src/threadpool-legacy.c - platform/external/pthreadpool - Git at Google

 /* Standard C headers */
 #include <stddef.h>

 /* Dependencies */
 #include <fxdiv.h>

 /* Library header */
 #include <pthreadpool.h>


 static inline size_t divide_round_up(size_t dividend, size_t divisor) {
 	if (dividend % divisor == 0) {
 		return dividend / divisor;
 	} else {
 		return dividend / divisor + 1;
 	}
 }

 static inline size_t min(size_t a, size_t b) {
 	return a < b ? a : b;
 }

 void pthreadpool_compute_1d(
 	pthreadpool_t threadpool,
 	pthreadpool_function_1d_t function,
 	void* argument,
 	size_t range)
 {
 	pthreadpool_parallelize_1d(threadpool,
 		(pthreadpool_task_1d_t) function, argument,
 		range, 0 /* flags */);
 }

 void pthreadpool_compute_1d_tiled(
 	pthreadpool_t threadpool,
 	pthreadpool_function_1d_tiled_t function,
 	void* argument,
 	size_t range,
 	size_t tile)
 {
 	pthreadpool_parallelize_1d_tile_1d(threadpool,
 		(pthreadpool_task_1d_tile_1d_t) function, argument,
 		range, tile, 0 /* flags */);
 }

 void pthreadpool_compute_2d(
 	pthreadpool_t threadpool,
 	pthreadpool_function_2d_t function,
 	void* argument,
 	size_t range_i,
 	size_t range_j)
 {
 	pthreadpool_parallelize_2d(threadpool,
 		(pthreadpool_task_2d_t) function, argument,
 		range_i, range_j, 0 /* flags */);
 }

 void pthreadpool_compute_2d_tiled(
 	pthreadpool_t threadpool,
 	pthreadpool_function_2d_tiled_t function,
 	void* argument,
 	size_t range_i,
 	size_t range_j,
 	size_t tile_i,
 	size_t tile_j)
 {
 	pthreadpool_parallelize_2d_tile_2d(threadpool,
 		(pthreadpool_task_2d_tile_2d_t) function, argument,
 		range_i, range_j, tile_i, tile_j, 0 /* flags */);
 }

 struct compute_3d_tiled_context {
 	pthreadpool_function_3d_tiled_t function;
 	void* argument;
 	struct fxdiv_divisor_size_t tile_range_j;
 	struct fxdiv_divisor_size_t tile_range_k;
 	size_t range_i;
 	size_t range_j;
 	size_t range_k;
 	size_t tile_i;
 	size_t tile_j;
 	size_t tile_k;
 };

 static void compute_3d_tiled(const struct compute_3d_tiled_context* context, size_t linear_index) {
 	const struct fxdiv_divisor_size_t tile_range_k = context->tile_range_k;
 	const struct fxdiv_result_size_t tile_index_ij_k = fxdiv_divide_size_t(linear_index, tile_range_k);
 	const struct fxdiv_divisor_size_t tile_range_j = context->tile_range_j;
 	const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(tile_index_ij_k.quotient, tile_range_j);
 	const size_t max_tile_i = context->tile_i;
 	const size_t max_tile_j = context->tile_j;
 	const size_t max_tile_k = context->tile_k;
 	const size_t index_i = tile_index_i_j.quotient * max_tile_i;
 	const size_t index_j = tile_index_i_j.remainder * max_tile_j;
 	const size_t index_k = tile_index_ij_k.remainder * max_tile_k;
 	const size_t tile_i = min(max_tile_i, context->range_i - index_i);
 	const size_t tile_j = min(max_tile_j, context->range_j - index_j);
 	const size_t tile_k = min(max_tile_k, context->range_k - index_k);
 	context->function(context->argument, index_i, index_j, index_k, tile_i, tile_j, tile_k);
 }

 void pthreadpool_compute_3d_tiled(
 	pthreadpool_t threadpool,
 	pthreadpool_function_3d_tiled_t function,
 	void* argument,
 	size_t range_i,
 	size_t range_j,
 	size_t range_k,
 	size_t tile_i,
 	size_t tile_j,
 	size_t tile_k)
 {
 	if (pthreadpool_get_threads_count(threadpool) <= 1) {
 		/* No thread pool used: execute function sequentially on the calling thread */
 		for (size_t i = 0; i < range_i; i += tile_i) {
 			for (size_t j = 0; j < range_j; j += tile_j) {
 				for (size_t k = 0; k < range_k; k += tile_k) {
 					function(argument, i, j, k, min(range_i - i, tile_i), min(range_j - j, tile_j), min(range_k - k, tile_k));
 				}
 			}
 		}
 	} else {
 		/* Execute in parallel on the thread pool using linearized index */
 		const size_t tile_range_i = divide_round_up(range_i, tile_i);
 		const size_t tile_range_j = divide_round_up(range_j, tile_j);
 		const size_t tile_range_k = divide_round_up(range_k, tile_k);
 		struct compute_3d_tiled_context context = {
 			.function = function,
 			.argument = argument,
 			.tile_range_j = fxdiv_init_size_t(tile_range_j),
 			.tile_range_k = fxdiv_init_size_t(tile_range_k),
 			.range_i = range_i,
 			.range_j = range_j,
 			.range_k = range_k,
 			.tile_i = tile_i,
 			.tile_j = tile_j,
 			.tile_k = tile_k
 		};
 		pthreadpool_parallelize_1d(threadpool,
 			(pthreadpool_task_1d_t) compute_3d_tiled, &context,
 			tile_range_i * tile_range_j * tile_range_k,
 			0 /* flags */);
 	}
 }

 struct compute_4d_tiled_context {
 	pthreadpool_function_4d_tiled_t function;
 	void* argument;
 	struct fxdiv_divisor_size_t tile_range_kl;
 	struct fxdiv_divisor_size_t tile_range_j;
 	struct fxdiv_divisor_size_t tile_range_l;
 	size_t range_i;
 	size_t range_j;
 	size_t range_k;
 	size_t range_l;
 	size_t tile_i;
 	size_t tile_j;
 	size_t tile_k;
 	size_t tile_l;
 };

 static void compute_4d_tiled(const struct compute_4d_tiled_context* context, size_t linear_index) {
 	const struct fxdiv_divisor_size_t tile_range_kl = context->tile_range_kl;
 	const struct fxdiv_result_size_t tile_index_ij_kl = fxdiv_divide_size_t(linear_index, tile_range_kl);
 	const struct fxdiv_divisor_size_t tile_range_j = context->tile_range_j;
 	const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(tile_index_ij_kl.quotient, tile_range_j);
 	const struct fxdiv_divisor_size_t tile_range_l = context->tile_range_l;
 	const struct fxdiv_result_size_t tile_index_k_l = fxdiv_divide_size_t(tile_index_ij_kl.remainder, tile_range_l);
 	const size_t max_tile_i = context->tile_i;
 	const size_t max_tile_j = context->tile_j;
 	const size_t max_tile_k = context->tile_k;
 	const size_t max_tile_l = context->tile_l;
 	const size_t index_i = tile_index_i_j.quotient * max_tile_i;
 	const size_t index_j = tile_index_i_j.remainder * max_tile_j;
 	const size_t index_k = tile_index_k_l.quotient * max_tile_k;
 	const size_t index_l = tile_index_k_l.remainder * max_tile_l;
 	const size_t tile_i = min(max_tile_i, context->range_i - index_i);
 	const size_t tile_j = min(max_tile_j, context->range_j - index_j);
 	const size_t tile_k = min(max_tile_k, context->range_k - index_k);
 	const size_t tile_l = min(max_tile_l, context->range_l - index_l);
 	context->function(context->argument, index_i, index_j, index_k, index_l, tile_i, tile_j, tile_k, tile_l);
 }

 void pthreadpool_compute_4d_tiled(
 	pthreadpool_t threadpool,
 	pthreadpool_function_4d_tiled_t function,
 	void* argument,
 	size_t range_i,
 	size_t range_j,
 	size_t range_k,
 	size_t range_l,
 	size_t tile_i,
 	size_t tile_j,
 	size_t tile_k,
 	size_t tile_l)
 {
 	if (pthreadpool_get_threads_count(threadpool) <= 1) {
 		/* No thread pool used: execute function sequentially on the calling thread */
 		for (size_t i = 0; i < range_i; i += tile_i) {
 			for (size_t j = 0; j < range_j; j += tile_j) {
 				for (size_t k = 0; k < range_k; k += tile_k) {
 					for (size_t l = 0; l < range_l; l += tile_l) {
 						function(argument, i, j, k, l,
 							min(range_i - i, tile_i), min(range_j - j, tile_j), min(range_k - k, tile_k), min(range_l - l, tile_l));
 					}
 				}
 			}
 		}
 	} else {
 		/* Execute in parallel on the thread pool using linearized index */
 		const size_t tile_range_i = divide_round_up(range_i, tile_i);
 		const size_t tile_range_j = divide_round_up(range_j, tile_j);
 		const size_t tile_range_k = divide_round_up(range_k, tile_k);
 		const size_t tile_range_l = divide_round_up(range_l, tile_l);
 		struct compute_4d_tiled_context context = {
 			.function = function,
 			.argument = argument,
 			.tile_range_kl = fxdiv_init_size_t(tile_range_k * tile_range_l),
 			.tile_range_j = fxdiv_init_size_t(tile_range_j),
 			.tile_range_l = fxdiv_init_size_t(tile_range_l),
 			.range_i = range_i,
 			.range_j = range_j,
 			.range_k = range_k,
 			.range_l = range_l,
 			.tile_i = tile_i,
 			.tile_j = tile_j,
 			.tile_k = tile_k,
 			.tile_l = tile_l
 		};
 		pthreadpool_parallelize_1d(threadpool,
 			(pthreadpool_task_1d_t) compute_4d_tiled, &context,
 			tile_range_i * tile_range_j * tile_range_k * tile_range_l,
 			0 /* flags */);
 	}
 }
	/* Standard C headers */
	#include <stddef.h>

	/* Dependencies */
	#include <fxdiv.h>

	/* Library header */
	#include <pthreadpool.h>


	static inline size_t divide_round_up(size_t dividend, size_t divisor) {
	if (dividend % divisor == 0) {
	return dividend / divisor;
	} else {
	return dividend / divisor + 1;
	}
	}

	static inline size_t min(size_t a, size_t b) {
	return a < b ? a : b;
	}

	void pthreadpool_compute_1d(
	pthreadpool_t threadpool,
	pthreadpool_function_1d_t function,
	void* argument,
	size_t range)
	{
	pthreadpool_parallelize_1d(threadpool,
	(pthreadpool_task_1d_t) function, argument,
	range, 0 /* flags */);
	}

	void pthreadpool_compute_1d_tiled(
	pthreadpool_t threadpool,
	pthreadpool_function_1d_tiled_t function,
	void* argument,
	size_t range,
	size_t tile)
	{
	pthreadpool_parallelize_1d_tile_1d(threadpool,
	(pthreadpool_task_1d_tile_1d_t) function, argument,
	range, tile, 0 /* flags */);
	}

	void pthreadpool_compute_2d(
	pthreadpool_t threadpool,
	pthreadpool_function_2d_t function,
	void* argument,
	size_t range_i,
	size_t range_j)
	{
	pthreadpool_parallelize_2d(threadpool,
	(pthreadpool_task_2d_t) function, argument,
	range_i, range_j, 0 /* flags */);
	}

	void pthreadpool_compute_2d_tiled(
	pthreadpool_t threadpool,
	pthreadpool_function_2d_tiled_t function,
	void* argument,
	size_t range_i,
	size_t range_j,
	size_t tile_i,
	size_t tile_j)
	{
	pthreadpool_parallelize_2d_tile_2d(threadpool,
	(pthreadpool_task_2d_tile_2d_t) function, argument,
	range_i, range_j, tile_i, tile_j, 0 /* flags */);
	}

	struct compute_3d_tiled_context {
	pthreadpool_function_3d_tiled_t function;
	void* argument;
	struct fxdiv_divisor_size_t tile_range_j;
	struct fxdiv_divisor_size_t tile_range_k;
	size_t range_i;
	size_t range_j;
	size_t range_k;
	size_t tile_i;
	size_t tile_j;
	size_t tile_k;
	};

	static void compute_3d_tiled(const struct compute_3d_tiled_context* context, size_t linear_index) {
	const struct fxdiv_divisor_size_t tile_range_k = context->tile_range_k;
	const struct fxdiv_result_size_t tile_index_ij_k = fxdiv_divide_size_t(linear_index, tile_range_k);
	const struct fxdiv_divisor_size_t tile_range_j = context->tile_range_j;
	const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(tile_index_ij_k.quotient, tile_range_j);
	const size_t max_tile_i = context->tile_i;
	const size_t max_tile_j = context->tile_j;
	const size_t max_tile_k = context->tile_k;
	const size_t index_i = tile_index_i_j.quotient * max_tile_i;
	const size_t index_j = tile_index_i_j.remainder * max_tile_j;
	const size_t index_k = tile_index_ij_k.remainder * max_tile_k;
	const size_t tile_i = min(max_tile_i, context->range_i - index_i);
	const size_t tile_j = min(max_tile_j, context->range_j - index_j);
	const size_t tile_k = min(max_tile_k, context->range_k - index_k);
	context->function(context->argument, index_i, index_j, index_k, tile_i, tile_j, tile_k);
	}

	void pthreadpool_compute_3d_tiled(
	pthreadpool_t threadpool,
	pthreadpool_function_3d_tiled_t function,
	void* argument,
	size_t range_i,
	size_t range_j,
	size_t range_k,
	size_t tile_i,
	size_t tile_j,
	size_t tile_k)
	{
	if (pthreadpool_get_threads_count(threadpool) <= 1) {
	/* No thread pool used: execute function sequentially on the calling thread */
	for (size_t i = 0; i < range_i; i += tile_i) {
	for (size_t j = 0; j < range_j; j += tile_j) {
	for (size_t k = 0; k < range_k; k += tile_k) {
	function(argument, i, j, k, min(range_i - i, tile_i), min(range_j - j, tile_j), min(range_k - k, tile_k));
	}
	}
	}
	} else {
	/* Execute in parallel on the thread pool using linearized index */
	const size_t tile_range_i = divide_round_up(range_i, tile_i);
	const size_t tile_range_j = divide_round_up(range_j, tile_j);
	const size_t tile_range_k = divide_round_up(range_k, tile_k);
	struct compute_3d_tiled_context context = {
	.function = function,
	.argument = argument,
	.tile_range_j = fxdiv_init_size_t(tile_range_j),
	.tile_range_k = fxdiv_init_size_t(tile_range_k),
	.range_i = range_i,
	.range_j = range_j,
	.range_k = range_k,
	.tile_i = tile_i,
	.tile_j = tile_j,
	.tile_k = tile_k
	};
	pthreadpool_parallelize_1d(threadpool,
	(pthreadpool_task_1d_t) compute_3d_tiled, &context,
	tile_range_i * tile_range_j * tile_range_k,
	0 /* flags */);
	}
	}

	struct compute_4d_tiled_context {
	pthreadpool_function_4d_tiled_t function;
	void* argument;
	struct fxdiv_divisor_size_t tile_range_kl;
	struct fxdiv_divisor_size_t tile_range_j;
	struct fxdiv_divisor_size_t tile_range_l;
	size_t range_i;
	size_t range_j;
	size_t range_k;
	size_t range_l;
	size_t tile_i;
	size_t tile_j;
	size_t tile_k;
	size_t tile_l;
	};

	static void compute_4d_tiled(const struct compute_4d_tiled_context* context, size_t linear_index) {
	const struct fxdiv_divisor_size_t tile_range_kl = context->tile_range_kl;
	const struct fxdiv_result_size_t tile_index_ij_kl = fxdiv_divide_size_t(linear_index, tile_range_kl);
	const struct fxdiv_divisor_size_t tile_range_j = context->tile_range_j;
	const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(tile_index_ij_kl.quotient, tile_range_j);
	const struct fxdiv_divisor_size_t tile_range_l = context->tile_range_l;
	const struct fxdiv_result_size_t tile_index_k_l = fxdiv_divide_size_t(tile_index_ij_kl.remainder, tile_range_l);
	const size_t max_tile_i = context->tile_i;
	const size_t max_tile_j = context->tile_j;
	const size_t max_tile_k = context->tile_k;
	const size_t max_tile_l = context->tile_l;
	const size_t index_i = tile_index_i_j.quotient * max_tile_i;
	const size_t index_j = tile_index_i_j.remainder * max_tile_j;
	const size_t index_k = tile_index_k_l.quotient * max_tile_k;
	const size_t index_l = tile_index_k_l.remainder * max_tile_l;
	const size_t tile_i = min(max_tile_i, context->range_i - index_i);
	const size_t tile_j = min(max_tile_j, context->range_j - index_j);
	const size_t tile_k = min(max_tile_k, context->range_k - index_k);
	const size_t tile_l = min(max_tile_l, context->range_l - index_l);
	context->function(context->argument, index_i, index_j, index_k, index_l, tile_i, tile_j, tile_k, tile_l);
	}

	void pthreadpool_compute_4d_tiled(
	pthreadpool_t threadpool,
	pthreadpool_function_4d_tiled_t function,
	void* argument,
	size_t range_i,
	size_t range_j,
	size_t range_k,
	size_t range_l,
	size_t tile_i,
	size_t tile_j,
	size_t tile_k,
	size_t tile_l)
	{
	if (pthreadpool_get_threads_count(threadpool) <= 1) {
	/* No thread pool used: execute function sequentially on the calling thread */
	for (size_t i = 0; i < range_i; i += tile_i) {
	for (size_t j = 0; j < range_j; j += tile_j) {
	for (size_t k = 0; k < range_k; k += tile_k) {
	for (size_t l = 0; l < range_l; l += tile_l) {
	function(argument, i, j, k, l,
	min(range_i - i, tile_i), min(range_j - j, tile_j), min(range_k - k, tile_k), min(range_l - l, tile_l));
	}
	}
	}
	}
	} else {
	/* Execute in parallel on the thread pool using linearized index */
	const size_t tile_range_i = divide_round_up(range_i, tile_i);
	const size_t tile_range_j = divide_round_up(range_j, tile_j);
	const size_t tile_range_k = divide_round_up(range_k, tile_k);
	const size_t tile_range_l = divide_round_up(range_l, tile_l);
	struct compute_4d_tiled_context context = {
	.function = function,
	.argument = argument,
	.tile_range_kl = fxdiv_init_size_t(tile_range_k * tile_range_l),
	.tile_range_j = fxdiv_init_size_t(tile_range_j),
	.tile_range_l = fxdiv_init_size_t(tile_range_l),
	.range_i = range_i,
	.range_j = range_j,
	.range_k = range_k,
	.range_l = range_l,
	.tile_i = tile_i,
	.tile_j = tile_j,
	.tile_k = tile_k,
	.tile_l = tile_l
	};
	pthreadpool_parallelize_1d(threadpool,
	(pthreadpool_task_1d_t) compute_4d_tiled, &context,
	tile_range_i * tile_range_j * tile_range_k * tile_range_l,
	0 /* flags */);
	}
	}