examples/models/phi-3-mini/runner.h - platform/external/executorch - Git at Google

 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */

 // A simple phi-3-mini runner that includes preprocessing and post processing
 // logic. The module takes in a string as input and emits a string as output.

 #pragma once

 #include <memory>
 #include <string>

 #include <executorch/extension/llm/sampler/sampler.h>
 #include <executorch/extension/llm/tokenizer/tokenizer.h>
 #include <executorch/extension/module/module.h>
 #include <executorch/runtime/core/exec_aten/exec_aten.h>

 namespace torch::executor {

 class Runner {
  public:
   explicit Runner(
       const std::string& model_path,
       const std::string& tokenizer_path,
       const float temperature = 0.8f);

   /**
    * Generates response for a given prompt.
    *
    * @param[in] prompt The prompt to generate a response for.
    * @param[in] max_seq_len The maximum length of the sequence to generate,
    * including prompt.
    */
   void generate(const std::string& prompt, std::size_t max_seq_len);

  private:
   uint64_t logits_to_token(const exec_aten::Tensor& logits_tensor);
   uint64_t prefill(std::vector<uint64_t>& tokens);
   uint64_t run_model_step(uint64_t token);

   std::unique_ptr<Module> module_;
   std::unique_ptr<Tokenizer> tokenizer_;
   std::unique_ptr<Sampler> sampler_;
 };

 } // namespace torch::executor
	/*
	* Copyright (c) Meta Platforms, Inc. and affiliates.
	* All rights reserved.
	*
	* This source code is licensed under the BSD-style license found in the
	* LICENSE file in the root directory of this source tree.
	*/

	// A simple phi-3-mini runner that includes preprocessing and post processing
	// logic. The module takes in a string as input and emits a string as output.

	#pragma once

	#include <memory>
	#include <string>

	#include <executorch/extension/llm/sampler/sampler.h>
	#include <executorch/extension/llm/tokenizer/tokenizer.h>
	#include <executorch/extension/module/module.h>
	#include <executorch/runtime/core/exec_aten/exec_aten.h>

	namespace torch::executor {

	class Runner {
	public:
	explicit Runner(
	const std::string& model_path,
	const std::string& tokenizer_path,
	const float temperature = 0.8f);

	/**
	* Generates response for a given prompt.
	*
	* @param[in] prompt The prompt to generate a response for.
	* @param[in] max_seq_len The maximum length of the sequence to generate,
	* including prompt.
	*/
	void generate(const std::string& prompt, std::size_t max_seq_len);

	private:
	uint64_t logits_to_token(const exec_aten::Tensor& logits_tensor);
	uint64_t prefill(std::vector<uint64_t>& tokens);
	uint64_t run_model_step(uint64_t token);

	std::unique_ptr<Module> module_;
	std::unique_ptr<Tokenizer> tokenizer_;
	std::unique_ptr<Sampler> sampler_;
	};

	} // namespace torch::executor