blob: 1cc892dedbe9eeb073dbc0d624ecb25aad1b182a [file] [log] [blame]
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cstdint>
#include <map>
#include <typeindex>
#include <variant>
#include <executorch/kernels/test/FunctionHeaderWrapper.h> // Declares the operator
#include <executorch/kernels/test/TestUtil.h>
#include <executorch/kernels/test/supported_features.h>
#include <executorch/runtime/core/exec_aten/exec_aten.h>
#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
#include <gtest/gtest.h>
using namespace ::testing;
using exec_aten::MemoryFormat;
using exec_aten::optional;
using exec_aten::ScalarType;
using exec_aten::Tensor;
using torch::executor::testing::TensorFactory;
// To further emphasize the accuracy of our op_to, we test the conversion
// from floating-point types to signed int types directly by the test cases
// generated by core Pytorch directly. Such data is random generated in [-5, 5].
// clang-format off
typedef std::map<
std::type_index,
std::variant<
std::vector<float>,
std::vector<double>>>
FloatingTypeToDataMap;
typedef std::map<
std::type_index,
std::variant<
std::vector<int64_t>,
std::vector<int32_t>,
std::vector<int16_t>,
std::vector<int8_t>,
std::vector<uint8_t>>>
IntTypeToDataMap;
// clang-format on
class OpToTest : public OperatorTest {
protected:
Tensor& op_to_copy_out(
const Tensor& self,
bool non_blocking,
optional<MemoryFormat> memory_format,
Tensor& out) {
return torch::executor::aten::_to_copy_outf(
context_, self, non_blocking, memory_format, out);
}
// Cast float vector to OUTPUT_CTYPE vector
template <typename INPUT_CTYPE, typename OUTPUT_CTYPE>
std::vector<OUTPUT_CTYPE> vector_type_cast(std::vector<INPUT_CTYPE> input) {
std::vector<OUTPUT_CTYPE> output(input.size());
std::transform(
input.begin(), input.end(), output.begin(), [](INPUT_CTYPE x) {
return static_cast<OUTPUT_CTYPE>(x);
});
return output;
}
template <typename INPUT_CTYPE, typename OUTPUT_CTYPE>
struct ToTestCase {
const std::vector<int32_t> sizes;
const std::vector<INPUT_CTYPE> data_in;
const std::vector<OUTPUT_CTYPE> data_out;
};
// Each test has different combination of input and output types. Therefore it
// is a little bit mess if create template test case and custom data types for
// both input data and output data.
// We choose another way: for all test cases, their data are all in double.
// And we are gonna cast them into desired type when delievering them into
// tf.make function. Based on our experiments, type cast of core PyTorch is
// same as static_cast in c++ in the representable scope, so here we believe
// using static_cast to generate ground truth is reasonable.
template <
typename INPUT_CTYPE,
ScalarType INPUT_DTYPE,
typename OUTPUT_CTYPE,
ScalarType OUTPUT_DTYPE>
void test_runner_static_cast(
std::vector<ToTestCase<double, double>> test_cases) {
TensorFactory<INPUT_DTYPE> tf_in;
TensorFactory<OUTPUT_DTYPE> tf_out;
for (auto test_case : test_cases) {
auto data_in = vector_type_cast<double, INPUT_CTYPE>(test_case.data_in);
auto data_out = vector_type_cast<INPUT_CTYPE, OUTPUT_CTYPE>(data_in);
Tensor input = tf_in.make(test_case.sizes, data_in);
Tensor output = tf_out.zeros_like(input);
Tensor ret = op_to_copy_out(
/*self=*/input,
/*non_blocking=*/false,
exec_aten::MemoryFormat::Contiguous,
output);
Tensor expected = tf_out.make(test_case.sizes, data_out);
// The original tensor a should share same value with the out variable and
// return variable of to function
EXPECT_TENSOR_EQ(ret, output);
EXPECT_TENSOR_EQ(ret, expected);
}
}
template <typename INPUT_CTYPE, ScalarType INPUT_DTYPE>
void test_runner_to_bool(
std::vector<double> test_case,
std::vector<uint8_t> data_out) {
TensorFactory<INPUT_DTYPE> tf_in;
TensorFactory<ScalarType::Bool> tf_out;
auto data_in = vector_type_cast<double, INPUT_CTYPE>(test_case);
Tensor input = tf_in.make({(int)test_case.size()}, data_in);
Tensor output = tf_out.zeros_like(input);
Tensor ret = op_to_copy_out(
/*self=*/input,
/*non_blocking=*/false,
exec_aten::MemoryFormat::Contiguous,
output);
Tensor expected = tf_out.make({(int)data_out.size()}, data_out);
// The return value of op_to_copy_out and the values written to output
// should be the same.
EXPECT_TENSOR_EQ(ret, output);
// The return value of op_to_copy_out and the values in expected which are
// the reference values should be the same.
EXPECT_TENSOR_EQ(ret, expected);
}
template <typename OUT_CTYPE, ScalarType OUT_DTYPE>
void test_runner_from_bool(
std::vector<uint8_t> test_case,
std::vector<double> out) {
TensorFactory<ScalarType::Bool> tf_in;
TensorFactory<OUT_DTYPE> tf_out;
auto data_out = vector_type_cast<double, OUT_CTYPE>(out);
Tensor input = tf_in.make({(int)test_case.size()}, test_case);
Tensor output = tf_out.zeros_like(input);
Tensor ret = op_to_copy_out(
/*self=*/input,
/*non_blocking=*/false,
exec_aten::MemoryFormat::Contiguous,
output);
Tensor expected = tf_out.make({(int)data_out.size()}, data_out);
// The return value of op_to_copy_out and the values written to output
// should be the same.
EXPECT_TENSOR_EQ(ret, output);
// The return value of op_to_copy_out and the values in expected which are
// the reference values should be the same.
EXPECT_TENSOR_EQ(ret, expected);
}
template <
typename INPUT_CTYPE,
ScalarType INPUT_DTYPE,
typename OUTPUT_CTYPE,
ScalarType OUTPUT_DTYPE>
void test_runner_hardcode_data(
FloatingTypeToDataMap floating_point_data,
IntTypeToDataMap int_data) {
TensorFactory<INPUT_DTYPE> tf_in;
TensorFactory<OUTPUT_DTYPE> tf_out;
if (typeid(OUTPUT_CTYPE) == typeid(uint8_t)) {
// Would cause underflow when testing uint8_t.
return;
}
ToTestCase<INPUT_CTYPE, OUTPUT_CTYPE> test_case = {
/*sizes=*/{3, 5}, /*data_in=*/
std::get<std::vector<INPUT_CTYPE>>(
floating_point_data[typeid(INPUT_CTYPE)]),
/*data_out=*/
std::get<std::vector<OUTPUT_CTYPE>>(int_data[typeid(OUTPUT_CTYPE)])};
Tensor input = tf_in.make(test_case.sizes, test_case.data_in);
Tensor output = tf_out.zeros_like(input);
Tensor ret = op_to_copy_out(
/*self=*/input,
/*non_blocking=*/false,
exec_aten::MemoryFormat::Contiguous,
output);
Tensor expected = tf_out.make(test_case.sizes, test_case.data_out);
// The original tensor a should share same value with the out variable and
// return variable of to function
EXPECT_TENSOR_EQ(ret, output);
EXPECT_TENSOR_EQ(ret, expected);
}
/* %python
import torch
torch.manual_seed(0)
x = torch.rand(2, 3)
res = x.to(non_blocking = False, memory_format = torch.preserve_format)
op = "op_to_copy_out"
opt_setup_params = """
bool non_blocking = false;
optional<MemoryFormat> memory_format;
"""
opt_extra_params = "non_blocking, memory_format,"
out_args = "out_shape, dynamism"
dtype = "ScalarType::Float"
check = "EXPECT_TENSOR_EQ" */
void test_dynamic_shape(
const std::vector<int32_t>& out_shape,
enum torch::executor::TensorShapeDynamism dynamism) {
/* %python
%rewrite(unary_op) */
TensorFactory<ScalarType::Float> tf;
Tensor x = tf.make(
{2, 3},
{0.49625658988952637,
0.7682217955589294,
0.08847743272781372,
0.13203048706054688,
0.30742281675338745,
0.6340786814689636});
Tensor expected = tf.make(
{2, 3},
{0.49625658988952637,
0.7682217955589294,
0.08847743272781372,
0.13203048706054688,
0.30742281675338745,
0.6340786814689636});
bool non_blocking = false;
optional<MemoryFormat> memory_format;
Tensor out = tf.zeros(out_shape, dynamism);
op_to_copy_out(x, non_blocking, memory_format, out);
EXPECT_TENSOR_EQ(out, expected);
}
};
/* Here we temporary not try to implement or test the behavior about casting a
* number can not be represented in some type to this type (e.g. inf to int32_t
* nan to int64_t or 2147483648 to int32_t), because
* - a. The result of such kind of cast is undefined according to c++ standard;
* - b. No explicit rules can be found in core pytorch for such transaction (not
* same as static_cast or any other casting function in c++);
* - c. If user tries to cast a unrepresentable value to certain type, they
* should take the risk;
* - d. Even though we can always use if/switch to cover these boundry cases,
* the code will be lengthy and jumbled. I believe using these disordered
* code to meet some undefine behavior is meaningless, and we can not
* cover all such cases.
*/
namespace {} // namespace
// Regular test for to_copy.out
// Test if to_copy.out works well under all kinds of data pairs
TEST_F(OpToTest, AllDtypesSupported) {
std::vector<ToTestCase<double, double>> test_cases = {
{
/*sizes=*/{2, 4}, /*data_in=*/
{2.11, 3.2, 2.3, 4.0, 1.1, 5.2, 1.1, 6.3}, /*data_out=*/
{}, // data_out shouldn't be used in test_runner_static_cast
},
{
/*sizes=*/{3, 4, 0, 5},
/*data_in=*/{},
/*data_out=*/{},
},
{
/*sizes=*/{},
/*data_in=*/{10.0},
/*data_out=*/{}, // data_out shouldn't be used in
// test_runner_static_cast
},
};
#define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \
test_runner_static_cast< \
INPUT_CTYPE, \
ScalarType::INPUT_DTYPE, \
OUTPUT_CTYPE, \
ScalarType::OUTPUT_DTYPE>(test_cases);
#define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \
ET_FORALL_REAL_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL);
ET_FORALL_REAL_TYPES(TEST_ENTRY);
#undef TEST_ENTRY
#undef TEST_KERNEL
}
TEST_F(OpToTest, BoolTests) {
std::vector<double> test_case_to_bool = {1.1, 2.2, 0};
std::vector<uint8_t> result_to_bool = {true, true, false};
#define TEST_TO_BOOL(INPUT_CTYPE, INPUT_DTYPE) \
test_runner_to_bool<INPUT_CTYPE, ScalarType::INPUT_DTYPE>( \
test_case_to_bool, result_to_bool);
ET_FORALL_REAL_TYPES(TEST_TO_BOOL);
std::vector<uint8_t> test_case_from_bool = {true, true, false};
std::vector<double> result_from_bool = {1.0, 1.0, 0};
#define TEST_FROM_BOOL(OUTPUT_CTYPE, OUTPUT_DTYPE) \
test_runner_from_bool<OUTPUT_CTYPE, ScalarType::OUTPUT_DTYPE>( \
test_case_from_bool, result_from_bool);
ET_FORALL_REAL_TYPES(TEST_FROM_BOOL);
}
TEST_F(OpToTest, NanInfSupported) {
constexpr auto floatInfinity = std::numeric_limits<float>::infinity();
std::vector<ToTestCase<double, double>> test_cases = {{
/*sizes=*/{2, 4},
/*data_in=*/{2, 3, NAN, 4, floatInfinity, 5, -floatInfinity, 6},
/*data_out=*/{2, 3, NAN, 4, floatInfinity, 5, -floatInfinity, 6},
}};
#define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \
test_runner_static_cast< \
INPUT_CTYPE, \
ScalarType::INPUT_DTYPE, \
OUTPUT_CTYPE, \
ScalarType::OUTPUT_DTYPE>(test_cases);
#define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \
ET_FORALL_FLOAT_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL);
ET_FORALL_FLOAT_TYPES(TEST_ENTRY);
#undef TEST_ENTRY
#undef TEST_KERNEL
}
TEST_F(OpToTest, HardcodeFloatConvertInt) {
// Hardcode input and output generated from core PyTorch
// clang-format off
std::vector<float> float_data = {
-1.47900056838989257812, -4.59277725219726562500,
2.15365791320800781250, -2.55494546890258789062,
3.06999135017395019531, 3.27460670471191406250,
-3.98865103721618652344, -4.81065988540649414062,
3.67902207374572753906, 3.72226405143737792969,
0.80567771196365356445, 2.23788332939147949219,
-0.52035576105117797852, -1.58493483066558837891,
-0.30919688940048217773};
std::vector<double> double_data = {
-1.47900053955270172068, -4.59277735274143061872,
2.15365796963871947156, -2.55494554556038755422,
3.06999137834642255029, 3.27460679459944969949,
-3.98865109243288795682, -4.81065977167646074975,
3.67902198302105531980, 3.72226414774102742911,
0.80567768667100203572, 2.23788335717029518435,
-0.52035578832931150828, -1.58493480710766210251,
-0.30919688936285893988};
// clang-format on
std::vector<int64_t> int64_data = {
-1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0};
std::vector<int32_t> int32_data = {
-1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0};
std::vector<int16_t> int16_data = {
-1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0};
std::vector<int8_t> int8_data = {
-1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0};
// Gathering all floating point data together for better traversial
FloatingTypeToDataMap floating_point_data;
floating_point_data[typeid(float)] = float_data;
floating_point_data[typeid(double)] = double_data;
// Gathering all int data together for better traversial
IntTypeToDataMap int_data;
int_data[typeid(int64_t)] = int64_data;
int_data[typeid(int32_t)] = int32_data;
int_data[typeid(int16_t)] = int16_data;
int_data[typeid(int8_t)] = int8_data;
#define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \
test_runner_hardcode_data< \
INPUT_CTYPE, \
ScalarType::INPUT_DTYPE, \
OUTPUT_CTYPE, \
ScalarType::OUTPUT_DTYPE>(floating_point_data, int_data);
#define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \
ET_FORALL_INT_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL);
ET_FORALL_FLOAT_TYPES(TEST_ENTRY);
}
TEST_F(OpToTest, MismatchedSizesDie) {
if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
GTEST_SKIP() << "ATen kernel can handle mismatched sizes";
}
TensorFactory<ScalarType::Int> tf;
Tensor input = tf.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6});
Tensor out = tf.zeros({3, 2, 1, 1});
ET_EXPECT_KERNEL_FAILURE(
context_,
op_to_copy_out(
input,
/*non_blocking=*/false,
exec_aten::MemoryFormat::Contiguous,
out));
}
// Only contiguous memory is supported, the memory type MemoryFormat::Contiguous
// should not be allowed. The function is expected death if using the illegal
// memory format.
TEST_F(OpToTest, MismatchedMemoryFormatDies) {
if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
GTEST_SKIP() << "ATen kernel can handle non contiguous memory formats";
}
TensorFactory<ScalarType::Float> tf_in;
TensorFactory<ScalarType::Float> tf_out;
Tensor input =
tf_in.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6});
Tensor out = tf_out.zeros({3, 1, 1, 2});
ET_EXPECT_KERNEL_FAILURE(
context_,
op_to_copy_out(
input,
/*non_blocking=*/false,
static_cast<exec_aten::MemoryFormat>(55),
out));
// memory format can be null
EXPECT_TENSOR_EQ(
op_to_copy_out(
input,
/*non_blocking=*/false,
/*memory_format=*/exec_aten::nullopt,
out),
input);
}
// Only blocking data transfer supported
TEST_F(OpToTest, MismatchedBlockingDie) {
if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
GTEST_SKIP() << "ATen kernel can handle non blocking data transfer";
}
TensorFactory<ScalarType::Int> tf;
Tensor input = tf.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6});
Tensor out = tf.zeros(/*sizes=*/{3, 1, 1, 2});
ET_EXPECT_KERNEL_FAILURE(
context_,
op_to_copy_out(
input,
/*non_blocking=*/true,
exec_aten::MemoryFormat::Contiguous,
out));
}
TEST_F(OpToTest, DynamicShapeUpperBoundSameAsExpected) {
test_dynamic_shape(
{2, 3}, torch::executor::TensorShapeDynamism::DYNAMIC_BOUND);
}
TEST_F(OpToTest, DynamicShapeUpperBoundLargerThanExpected) {
test_dynamic_shape(
{10, 10}, torch::executor::TensorShapeDynamism::DYNAMIC_BOUND);
}
TEST_F(OpToTest, DynamicShapeUnbound) {
if (!torch::executor::testing::SupportedFeatures::get()->output_resize) {
GTEST_SKIP() << "Dynamic shape unbound not supported";
}
test_dynamic_shape(
{1, 1}, torch::executor::TensorShapeDynamism::DYNAMIC_UNBOUND);
}