caffe2/python/pipeline.py - platform/external/pytorch - Git at Google

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals

 from caffe2.python import core, queue_util
 from caffe2.python.dataio import Reader, Writer


 def processor_step(
         reader, writer, num_threads=1, processor=None, name='processor'):
     """
     Given a reader and a writer, couple them through a processor, running
     across multiple threads.

     Args:
         reader:      an instance of dataio.Reader
         writer:      an instance of dataio.Wrier
         num_threads: number of processing threads
         processor:   if provided, a function taking form:
                      (nets, out_record) = processor(record)
                      where `record` is a schema.Struct containing the input,
                      `nets` is the list of nets doing the transformation, and
                      `out_record` is a schema.Struct with transformed data;
         name:        Name to be given to nets and execution steps created.

     Returns:
         Execution step that runs all threads of the processor in parallel.
     """
     assert isinstance(reader, Reader)
     assert isinstance(writer, Writer)
     global_init_net = core.Net(name + '_producer_global_init')
     global_exit_net = core.Net(name + '_producer_global_exit')

     reader.setup_ex(global_init_net, global_exit_net)
     writer.setup_ex(global_init_net, global_exit_net)

     def default_processor(fields):
         return [], fields

     if processor is None:
         processor = default_processor

     steps = []
     for thread_id in range(num_threads):
         init_net = core.Net(name + "_init_net_%d" % thread_id)
         exit_net = core.Net(name + "_exit_net_%d" % thread_id)

         read_nets, status, rec = reader.read_record_ex(init_net, exit_net)
         process_nets, rec = processor(rec)
         write_nets, _ = writer.write_record_ex(rec, init_net, exit_net, status)

         step = core.execution_step(
             name + "_thread_%d" % thread_id, [
                 core.execution_step(name + "_init_step", init_net),
                 core.execution_step(
                     name + "_worker_step",
                     list(read_nets) + list(process_nets) + list(write_nets),
                     should_stop_blob=status
                 ), core.execution_step(name + "_exit_step", exit_net)
             ]
         )
         steps.append(step)

     return core.execution_step(
         "sender_step", [
             core.execution_step('init_step', global_init_net),
             core.execution_step(
                 "sender_steps", steps, concurrent_substeps=True),
             core.execution_step('finish_step', global_exit_net),
         ]
     )


 class LocalPipeline(object):
     """
     Create a data processing pipeline consisting of a sequence of
     multi-threaded processors communicating through queues.
     """
     def __init__(self):
         self.tasks = []
         self.init_net = core.Net('worker_init')

     def create_queue(self, capacity, schema):
         """
         Create a queue that will be used to communicate between processors.

         Args:
             capacity: max number of records in the queue
             schema:   a schema.Struct representing the schema of a record in
                       the queue.

         Returns:
             A QueueWrapper containing a queue.
         """
         return queue_util.QueueWrapper(self.init_net, capacity, schema)

     def add_task(self, task):
         """
         Add a task to the pipeline.
         This task will run in parallel to other tasks in the pipeline.
         """
         self.tasks.append(task)

     def link(self, reader, writer, num_threads=1, processor=None):
         """
         Add a task that will read from `reader`, and write to `writer`.
         See function `processor_step` above for description of the arguments.
         """
         self.add_task(processor_step(reader, writer, num_threads, processor))

     def get_step(self):
         """
         Create and return a Caffe2 execution step that will run all the tasks
         of this pipeline in parallel.
         """
         return core.execution_step('worker_step', [
             core.execution_step('worker_init', self.init_net),
             core.execution_step(
                 'tasks_step', self.tasks, concurrent_substeps=True)
         ])

     def get_step_and_output(self):
         """
         Return a tuple (execution_step, output) to be used as one of the tasks
         in a distributed pipeline.
         """
         output = self.init_net.ConstantFill([], value=0.0)
         return self.get_step(), [output]
	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function
	from __future__ import unicode_literals

	from caffe2.python import core, queue_util
	from caffe2.python.dataio import Reader, Writer


	def processor_step(
	reader, writer, num_threads=1, processor=None, name='processor'):
	"""
	Given a reader and a writer, couple them through a processor, running
	across multiple threads.

	Args:
	reader: an instance of dataio.Reader
	writer: an instance of dataio.Wrier
	num_threads: number of processing threads
	processor: if provided, a function taking form:
	(nets, out_record) = processor(record)
	where `record` is a schema.Struct containing the input,
	`nets` is the list of nets doing the transformation, and
	`out_record` is a schema.Struct with transformed data;
	name: Name to be given to nets and execution steps created.

	Returns:
	Execution step that runs all threads of the processor in parallel.
	"""
	assert isinstance(reader, Reader)
	assert isinstance(writer, Writer)
	global_init_net = core.Net(name + '_producer_global_init')
	global_exit_net = core.Net(name + '_producer_global_exit')

	reader.setup_ex(global_init_net, global_exit_net)
	writer.setup_ex(global_init_net, global_exit_net)

	def default_processor(fields):
	return [], fields

	if processor is None:
	processor = default_processor

	steps = []
	for thread_id in range(num_threads):
	init_net = core.Net(name + "_init_net_%d" % thread_id)
	exit_net = core.Net(name + "_exit_net_%d" % thread_id)

	read_nets, status, rec = reader.read_record_ex(init_net, exit_net)
	process_nets, rec = processor(rec)
	write_nets, _ = writer.write_record_ex(rec, init_net, exit_net, status)

	step = core.execution_step(
	name + "_thread_%d" % thread_id, [
	core.execution_step(name + "_init_step", init_net),
	core.execution_step(
	name + "_worker_step",
	list(read_nets) + list(process_nets) + list(write_nets),
	should_stop_blob=status
	), core.execution_step(name + "_exit_step", exit_net)
	]
	)
	steps.append(step)

	return core.execution_step(
	"sender_step", [
	core.execution_step('init_step', global_init_net),
	core.execution_step(
	"sender_steps", steps, concurrent_substeps=True),
	core.execution_step('finish_step', global_exit_net),
	]
	)


	class LocalPipeline(object):
	"""
	Create a data processing pipeline consisting of a sequence of
	multi-threaded processors communicating through queues.
	"""
	def __init__(self):
	self.tasks = []
	self.init_net = core.Net('worker_init')

	def create_queue(self, capacity, schema):
	"""
	Create a queue that will be used to communicate between processors.

	Args:
	capacity: max number of records in the queue
	schema: a schema.Struct representing the schema of a record in
	the queue.

	Returns:
	A QueueWrapper containing a queue.
	"""
	return queue_util.QueueWrapper(self.init_net, capacity, schema)

	def add_task(self, task):
	"""
	Add a task to the pipeline.
	This task will run in parallel to other tasks in the pipeline.
	"""
	self.tasks.append(task)

	def link(self, reader, writer, num_threads=1, processor=None):
	"""
	Add a task that will read from `reader`, and write to `writer`.
	See function `processor_step` above for description of the arguments.
	"""
	self.add_task(processor_step(reader, writer, num_threads, processor))

	def get_step(self):
	"""
	Create and return a Caffe2 execution step that will run all the tasks
	of this pipeline in parallel.
	"""
	return core.execution_step('worker_step', [
	core.execution_step('worker_init', self.init_net),
	core.execution_step(
	'tasks_step', self.tasks, concurrent_substeps=True)
	])

	def get_step_and_output(self):
	"""
	Return a tuple (execution_step, output) to be used as one of the tasks
	in a distributed pipeline.
	"""
	output = self.init_net.ConstantFill([], value=0.0)
	return self.get_step(), [output]