examples/models/llava/main.cpp - platform/external/executorch - Git at Google

 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */

 #include <executorch/examples/models/llava/runner/llava_runner.h>
 #include <gflags/gflags.h>
 #ifndef LLAVA_NO_TORCH_DUMMY_IMAGE
 #include <torch/torch.h>
 #else
 #include <algorithm> // std::fill
 #endif

 #if defined(ET_USE_THREADPOOL)
 #include <executorch/extension/threadpool/cpuinfo_utils.h>
 #include <executorch/extension/threadpool/threadpool.h>
 #endif

 DEFINE_string(
     model_path,
     "llava.pte",
     "Model serialized in flatbuffer format.");

 DEFINE_string(tokenizer_path, "tokenizer.bin", "Tokenizer stuff.");

 DEFINE_string(prompt, "The answer to the ultimate question is", "Prompt.");

 DEFINE_string(
     image_path,
     "",
     "The path to a .pt file, a serialized torch tensor for an image, longest edge resized to 336.");

 DEFINE_double(
     temperature,
     0.8f,
     "Temperature; Default is 0.8f. 0 = greedy argmax sampling (deterministic). Lower temperature = more deterministic");

 DEFINE_int32(
     seq_len,
     1024,
     "Total number of tokens to generate (prompt + output). Defaults to max_seq_len. If the number of input tokens + seq_len > max_seq_len, the output will be truncated to max_seq_len tokens.");

 DEFINE_int32(
     cpu_threads,
     -1,
     "Number of CPU threads for inference. Defaults to -1, which implies we'll use a heuristic to derive the # of performant cores for a specific device.");

 int32_t main(int32_t argc, char** argv) {
   gflags::ParseCommandLineFlags(&argc, &argv, true);

   // Create a loader to get the data of the program file. There are other
   // DataLoaders that use mmap() or point32_t to data that's already in memory,
   // and users can create their own DataLoaders to load from arbitrary sources.
   const char* model_path = FLAGS_model_path.c_str();

   const char* tokenizer_path = FLAGS_tokenizer_path.c_str();

   const char* prompt = FLAGS_prompt.c_str();

   std::string image_path = FLAGS_image_path;

   double temperature = FLAGS_temperature;

   int32_t seq_len = FLAGS_seq_len;

   int32_t cpu_threads = FLAGS_cpu_threads;

 #if defined(ET_USE_THREADPOOL)
   uint32_t num_performant_cores = cpu_threads == -1
       ? torch::executorch::cpuinfo::get_num_performant_cores()
       : static_cast<uint32_t>(cpu_threads);
   ET_LOG(
       Info, "Resetting threadpool with num threads = %d", num_performant_cores);
   if (num_performant_cores > 0) {
     torch::executorch::threadpool::get_threadpool()->_unsafe_reset_threadpool(
         num_performant_cores);
   }
 #endif
   // create llama runner
   torch::executor::LlavaRunner runner(model_path, tokenizer_path, temperature);

   // read image and resize the longest edge to 336
   std::vector<uint8_t> image_data;

 #ifdef LLAVA_NO_TORCH_DUMMY_IMAGE
   // Work without torch using a random data
   image_data.resize(3 * 240 * 336);
   std::fill(image_data.begin(), image_data.end(), 0); // black
   std::array<int32_t, 3> image_shape = {3, 240, 336};
   std::vector<torch::executor::Image> images = {
       {.data = image_data, .width = image_shape[2], .height = image_shape[1]}};
 #else //  LLAVA_NO_TORCH_DUMMY_IMAGE
   //   cv::Mat image = cv::imread(image_path, cv::IMREAD_COLOR);
   //   int longest_edge = std::max(image.rows, image.cols);
   //   float scale_factor = 336.0f / longest_edge;
   //   cv::Size new_size(image.cols * scale_factor, image.rows * scale_factor);
   //   cv::Mat resized_image;
   //   cv::resize(image, resized_image, new_size);
   //   image_data.assign(resized_image.datastart, resized_image.dataend);
   torch::Tensor image_tensor;
   torch::load(image_tensor, image_path); // CHW
   ET_LOG(
       Info,
       "image size(0): %" PRId64 ", size(1): %" PRId64 ", size(2): %" PRId64,
       image_tensor.size(0),
       image_tensor.size(1),
       image_tensor.size(2));
   image_data.assign(
       image_tensor.data_ptr<uint8_t>(),
       image_tensor.data_ptr<uint8_t>() + image_tensor.numel());
   std::vector<torch::executor::Image> images = {
       {.data = image_data,
        .width = static_cast<int32_t>(image_tensor.size(2)),
        .height = static_cast<int32_t>(image_tensor.size(1))}};
 #endif // LLAVA_NO_TORCH_DUMMY_IMAGE

   // generate
   runner.generate(std::move(images), prompt, seq_len);
   return 0;
 }
	/*
	* Copyright (c) Meta Platforms, Inc. and affiliates.
	* All rights reserved.
	*
	* This source code is licensed under the BSD-style license found in the
	* LICENSE file in the root directory of this source tree.
	*/

	#include <executorch/examples/models/llava/runner/llava_runner.h>
	#include <gflags/gflags.h>
	#ifndef LLAVA_NO_TORCH_DUMMY_IMAGE
	#include <torch/torch.h>
	#else
	#include <algorithm> // std::fill
	#endif

	#if defined(ET_USE_THREADPOOL)
	#include <executorch/extension/threadpool/cpuinfo_utils.h>
	#include <executorch/extension/threadpool/threadpool.h>
	#endif

	DEFINE_string(
	model_path,
	"llava.pte",
	"Model serialized in flatbuffer format.");

	DEFINE_string(tokenizer_path, "tokenizer.bin", "Tokenizer stuff.");

	DEFINE_string(prompt, "The answer to the ultimate question is", "Prompt.");

	DEFINE_string(
	image_path,
	"",
	"The path to a .pt file, a serialized torch tensor for an image, longest edge resized to 336.");

	DEFINE_double(
	temperature,
	0.8f,
	"Temperature; Default is 0.8f. 0 = greedy argmax sampling (deterministic). Lower temperature = more deterministic");

	DEFINE_int32(
	seq_len,
	1024,
	"Total number of tokens to generate (prompt + output). Defaults to max_seq_len. If the number of input tokens + seq_len > max_seq_len, the output will be truncated to max_seq_len tokens.");

	DEFINE_int32(
	cpu_threads,
	-1,
	"Number of CPU threads for inference. Defaults to -1, which implies we'll use a heuristic to derive the # of performant cores for a specific device.");

	int32_t main(int32_t argc, char** argv) {
	gflags::ParseCommandLineFlags(&argc, &argv, true);

	// Create a loader to get the data of the program file. There are other
	// DataLoaders that use mmap() or point32_t to data that's already in memory,
	// and users can create their own DataLoaders to load from arbitrary sources.
	const char* model_path = FLAGS_model_path.c_str();

	const char* tokenizer_path = FLAGS_tokenizer_path.c_str();

	const char* prompt = FLAGS_prompt.c_str();

	std::string image_path = FLAGS_image_path;

	double temperature = FLAGS_temperature;

	int32_t seq_len = FLAGS_seq_len;

	int32_t cpu_threads = FLAGS_cpu_threads;

	#if defined(ET_USE_THREADPOOL)
	uint32_t num_performant_cores = cpu_threads == -1
	? torch::executorch::cpuinfo::get_num_performant_cores()
	: static_cast<uint32_t>(cpu_threads);
	ET_LOG(
	Info, "Resetting threadpool with num threads = %d", num_performant_cores);
	if (num_performant_cores > 0) {
	torch::executorch::threadpool::get_threadpool()->_unsafe_reset_threadpool(
	num_performant_cores);
	}
	#endif
	// create llama runner
	torch::executor::LlavaRunner runner(model_path, tokenizer_path, temperature);

	// read image and resize the longest edge to 336
	std::vector<uint8_t> image_data;

	#ifdef LLAVA_NO_TORCH_DUMMY_IMAGE
	// Work without torch using a random data
	image_data.resize(3 * 240 * 336);
	std::fill(image_data.begin(), image_data.end(), 0); // black
	std::array<int32_t, 3> image_shape = {3, 240, 336};
	std::vector<torch::executor::Image> images = {
	{.data = image_data, .width = image_shape[2], .height = image_shape[1]}};
	#else // LLAVA_NO_TORCH_DUMMY_IMAGE
	// cv::Mat image = cv::imread(image_path, cv::IMREAD_COLOR);
	// int longest_edge = std::max(image.rows, image.cols);
	// float scale_factor = 336.0f / longest_edge;
	// cv::Size new_size(image.cols * scale_factor, image.rows * scale_factor);
	// cv::Mat resized_image;
	// cv::resize(image, resized_image, new_size);
	// image_data.assign(resized_image.datastart, resized_image.dataend);
	torch::Tensor image_tensor;
	torch::load(image_tensor, image_path); // CHW
	ET_LOG(
	Info,
	"image size(0): %" PRId64 ", size(1): %" PRId64 ", size(2): %" PRId64,
	image_tensor.size(0),
	image_tensor.size(1),
	image_tensor.size(2));
	image_data.assign(
	image_tensor.data_ptr<uint8_t>(),
	image_tensor.data_ptr<uint8_t>() + image_tensor.numel());
	std::vector<torch::executor::Image> images = {
	{.data = image_data,
	.width = static_cast<int32_t>(image_tensor.size(2)),
	.height = static_cast<int32_t>(image_tensor.size(1))}};
	#endif // LLAVA_NO_TORCH_DUMMY_IMAGE

	// generate
	runner.generate(std::move(images), prompt, seq_len);
	return 0;
	}