blob: 15022751a80b764311c4f60198e2f24cc73d96c3 [file] [log] [blame]
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
// A simple phi-3-mini runner that includes preprocessing and post processing
// logic. The module takes in a string as input and emits a string as output.
#pragma once
#include <memory>
#include <string>
#include <executorch/extension/llm/sampler/sampler.h>
#include <executorch/extension/llm/tokenizer/tokenizer.h>
#include <executorch/extension/module/module.h>
#include <executorch/runtime/core/exec_aten/exec_aten.h>
namespace torch::executor {
class Runner {
public:
explicit Runner(
const std::string& model_path,
const std::string& tokenizer_path,
const float temperature = 0.8f);
/**
* Generates response for a given prompt.
*
* @param[in] prompt The prompt to generate a response for.
* @param[in] max_seq_len The maximum length of the sequence to generate,
* including prompt.
*/
void generate(const std::string& prompt, std::size_t max_seq_len);
private:
uint64_t logits_to_token(const exec_aten::Tensor& logits_tensor);
uint64_t prefill(std::vector<uint64_t>& tokens);
uint64_t run_model_step(uint64_t token);
std::unique_ptr<Module> module_;
std::unique_ptr<Tokenizer> tokenizer_;
std::unique_ptr<Sampler> sampler_;
};
} // namespace torch::executor