Add PACK/UNPACK/SPLIT operators to TFLite Micro.

PiperOrigin-RevId: 263247993
diff --git a/tensorflow/lite/experimental/micro/kernels/BUILD b/tensorflow/lite/experimental/micro/kernels/BUILD
index d236b3a..dc2ebe7 100644
--- a/tensorflow/lite/experimental/micro/kernels/BUILD
+++ b/tensorflow/lite/experimental/micro/kernels/BUILD
@@ -24,12 +24,15 @@
         "fully_connected.cc",
         "logical.cc",
         "maximum_minimum.cc",
+        "pack.cc",
         "pooling.cc",
         "prelu.cc",
         "reshape.cc",
         "round.cc",
         "softmax.cc",
+        "split.cc",
         "strided_slice.cc",
+        "unpack.cc",
     ],
     hdrs = [
     ],
@@ -75,13 +78,16 @@
         "fully_connected.cc",
         "logical.cc",
         "maximum_minimum.cc",
+        "pack.cc",
         "pooling.cc",
         "portable_optimized/depthwise_conv.cc",
         "prelu.cc",
         "reshape.cc",
         "round.cc",
         "softmax.cc",
+        "split.cc",
         "strided_slice.cc",
+        "unpack.cc",
     ],
     hdrs = [
     ],
@@ -323,6 +329,45 @@
     ],
 )
 
+tflite_micro_cc_test(
+    name = "pack_test",
+    srcs = [
+        "pack_test.cc",
+    ],
+    deps = [
+        ":all_ops_resolver",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "unpack_test",
+    srcs = [
+        "unpack_test.cc",
+    ],
+    deps = [
+        ":all_ops_resolver",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
+tflite_micro_cc_test(
+    name = "split_test",
+    srcs = [
+        "split_test.cc",
+    ],
+    deps = [
+        ":all_ops_resolver",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/experimental/micro:micro_framework",
+        "//tensorflow/lite/experimental/micro/testing:micro_test",
+    ],
+)
+
 cc_library(
     name = "micro_utils",
     hdrs = ["micro_utils.h"],
diff --git a/tensorflow/lite/experimental/micro/kernels/all_ops_resolver.cc b/tensorflow/lite/experimental/micro/kernels/all_ops_resolver.cc
index 334e4a7..2e0a21f 100644
--- a/tensorflow/lite/experimental/micro/kernels/all_ops_resolver.cc
+++ b/tensorflow/lite/experimental/micro/kernels/all_ops_resolver.cc
@@ -48,6 +48,9 @@
 TfLiteRegistration* Register_CEIL();
 TfLiteRegistration* Register_ROUND();
 TfLiteRegistration* Register_STRIDED_SLICE();
+TfLiteRegistration* Register_PACK();
+TfLiteRegistration* Register_SPLIT();
+TfLiteRegistration* Register_UNPACK();
 
 AllOpsResolver::AllOpsResolver() {
   AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D());
@@ -84,6 +87,11 @@
   AddBuiltin(BuiltinOperator_CEIL, Register_CEIL());
   AddBuiltin(BuiltinOperator_ROUND, Register_ROUND());
   AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE());
+  AddBuiltin(BuiltinOperator_PACK, Register_PACK());
+  AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(),
+             /* min_version */ 1,
+             /* max_version */ 3);
+  AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK());
 }
 
 }  // namespace micro
diff --git a/tensorflow/lite/experimental/micro/kernels/pack.cc b/tensorflow/lite/experimental/micro/kernels/pack.cc
new file mode 100644
index 0000000..091f81f
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/kernels/pack.cc
@@ -0,0 +1,122 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+namespace pack {
+namespace {
+
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+template <typename T>
+TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node,
+                      TfLiteTensor* output, int values_count, int axis) {
+  const int dimensions = output->dims->size;
+  const TfLiteTensor* input0 = &context->tensors[node->inputs->data[0]];
+  const TfLiteIntArray* input_dims = input0->dims;
+  const TfLiteIntArray* output_dims = output->dims;
+
+  if (axis < 0) {
+    axis += dimensions;
+  }
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; ++i) {
+    outer_size *= output_dims->data[i];
+  }
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; ++i) {
+    copy_size *= output_dims->data[i];
+  }
+  int input_size = 1;
+  for (int i = 0; i < input_dims->size; ++i) {
+    input_size *= input_dims->data[i];
+  }
+  TFLITE_DCHECK_EQ(input_size, copy_size * outer_size);
+
+  T* output_data = GetTensorData<T>(output);
+
+  for (int i = 0; i < values_count; ++i) {
+    TfLiteTensor* t = &context->tensors[node->inputs->data[i]];
+    const T* input_data = GetTensorData<T>(t);
+    for (int k = 0; k < outer_size; ++k) {
+      const T* input_ptr = input_data + copy_size * k;
+      int loc = k * values_count * copy_size + i * copy_size;
+      T* output_ptr = output_data + loc;
+      for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
+    }
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLitePackParams* data =
+      reinterpret_cast<TfLitePackParams*>(node->builtin_data);
+
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  switch (output->type) {
+    case kTfLiteFloat32: {
+      return PackImpl<float>(context, node, output, data->values_count,
+                             data->axis);
+    }
+    case kTfLiteUInt8: {
+      return PackImpl<uint8_t>(context, node, output, data->values_count,
+                               data->axis);
+    }
+    case kTfLiteInt8: {
+      return PackImpl<int8_t>(context, node, output, data->values_count,
+                              data->axis);
+    }
+    case kTfLiteInt32: {
+      return PackImpl<int32_t>(context, node, output, data->values_count,
+                               data->axis);
+    }
+    case kTfLiteInt64: {
+      return PackImpl<int64_t>(context, node, output, data->values_count,
+                               data->axis);
+    }
+    default: {
+      context->ReportError(context, "Type '%s' is not supported by pack.",
+                           TfLiteTypeGetName(output->type));
+      return kTfLiteError;
+    }
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace
+}  // namespace pack
+
+TfLiteRegistration* Register_PACK() {
+  static TfLiteRegistration r = {nullptr, nullptr, pack::Prepare, pack::Eval};
+  return &r;
+}
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/lite/experimental/micro/kernels/pack_test.cc b/tensorflow/lite/experimental/micro/kernels/pack_test.cc
new file mode 100644
index 0000000..cbbe86f
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/kernels/pack_test.cc
@@ -0,0 +1,420 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/debug_log.h"
+#include "tensorflow/lite/experimental/micro/kernels/all_ops_resolver.h"
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
+#include "tensorflow/lite/experimental/micro/testing/test_utils.h"
+
+namespace tflite {
+namespace testing {
+
+void TestPackTwoInputsFloat(std::initializer_list<int> input1_dims_data,
+                            std::initializer_list<float> input1_data,
+                            std::initializer_list<int> input2_dims_data,
+                            std::initializer_list<float> input2_data, int axis,
+                            std::initializer_list<int> output_dims_data,
+                            std::initializer_list<float> expected_output_data,
+                            float* output_data) {
+  TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data);
+  TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int input_size = 2;
+  constexpr int output_size = 1;
+  constexpr int tensors_size = input_size + output_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateFloatTensor(input1_data, input1_dims, "input1_tensor"),
+      CreateFloatTensor(input2_data, input2_dims, "input2_tensor"),
+      CreateFloatTensor(output_data, output_dims, "output_tensor")};
+
+  // Place a unique value in the uninitialized output buffer.
+  for (int i = 0; i < output_dims_count; ++i) {
+    output_data[i] = 23;
+  }
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+  tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_PACK, /* version= */ 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLitePackParams builtin_data = {
+      .values_count = 2,
+      .axis = axis,
+  };
+
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, nullptr, 0);
+  }
+  TfLiteIntArray* inputs_array = IntArrayFromInitializer({2, 0, 1});
+  TfLiteIntArray* outputs_array = IntArrayFromInitializer({1, 2});
+  TfLiteIntArray* temporaries_array = IntArrayFromInitializer({0});
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i],
+                              1e-5f);
+  }
+}
+
+void TestPackThreeInputsFloat(std::initializer_list<int> input1_dims_data,
+                              std::initializer_list<float> input1_data,
+                              std::initializer_list<int> input2_dims_data,
+                              std::initializer_list<float> input2_data,
+                              std::initializer_list<int> input3_dims_data,
+                              std::initializer_list<float> input3_data,
+                              int axis,
+                              std::initializer_list<int> output_dims_data,
+                              std::initializer_list<float> expected_output_data,
+                              float* output_data) {
+  TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data);
+  TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data);
+  TfLiteIntArray* input3_dims = IntArrayFromInitializer(input3_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int input_size = 3;
+  constexpr int output_size = 1;
+  constexpr int tensors_size = input_size + output_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateFloatTensor(input1_data, input1_dims, "input1_tensor"),
+      CreateFloatTensor(input2_data, input2_dims, "input2_tensor"),
+      CreateFloatTensor(input3_data, input3_dims, "input3_tensor"),
+      CreateFloatTensor(output_data, output_dims, "output_tensor")};
+
+  // Place a unique value in the uninitialized output buffer.
+  for (int i = 0; i < output_dims_count; ++i) {
+    output_data[i] = 23;
+  }
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+
+  tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_PACK, /* version= */ 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLitePackParams builtin_data = {
+      .values_count = 3,
+      .axis = axis,
+  };
+
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, nullptr, 0);
+  }
+  TfLiteIntArray* inputs_array = IntArrayFromInitializer({3, 0, 1, 2});
+  TfLiteIntArray* outputs_array = IntArrayFromInitializer({1, 3});
+  TfLiteIntArray* temporaries_array = IntArrayFromInitializer({0});
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i],
+                              1e-5f);
+  }
+}
+
+void TestPackTwoInputsQuantized(
+    std::initializer_list<int> input1_dims_data,
+    std::initializer_list<uint8_t> input1_data,
+    std::initializer_list<int> input2_dims_data,
+    std::initializer_list<uint8_t> input2_data, int axis,
+    std::initializer_list<int> output_dims_data,
+    std::initializer_list<uint8_t> expected_output_data, uint8_t* output_data) {
+  TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data);
+  TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int input_size = 2;
+  constexpr int output_size = 1;
+  constexpr int tensors_size = input_size + output_size;
+  TfLiteTensor tensors[tensors_size] = {
+      // CreateQuantizedTensor needs min/max values as input, but these values
+      // don't matter as to the functionality of PACK, so just set as 0 and 10.
+      CreateQuantizedTensor(input1_data, input1_dims, "input1_tensor", 0, 10),
+      CreateQuantizedTensor(input2_data, input2_dims, "input2_tensor", 0, 10),
+      CreateQuantizedTensor(output_data, output_dims, "output_tensor", 0, 10)};
+
+  // Place a unique value in the uninitialized output buffer.
+  for (int i = 0; i < output_dims_count; ++i) {
+    output_data[i] = 23;
+  }
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+  tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_PACK, /* version= */ 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLitePackParams builtin_data = {
+      .values_count = 2,
+      .axis = axis,
+  };
+
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, nullptr, 0);
+  }
+  TfLiteIntArray* inputs_array = IntArrayFromInitializer({2, 0, 1});
+  TfLiteIntArray* outputs_array = IntArrayFromInitializer({1, 2});
+  TfLiteIntArray* temporaries_array = IntArrayFromInitializer({0});
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]);
+  }
+}
+
+void TestPackTwoInputsQuantized32(
+    std::initializer_list<int> input1_dims_data,
+    std::initializer_list<int32_t> input1_data,
+    std::initializer_list<int> input2_dims_data,
+    std::initializer_list<int32_t> input2_data, int axis,
+    std::initializer_list<int> output_dims_data,
+    std::initializer_list<int32_t> expected_output_data, int32_t* output_data) {
+  TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data);
+  TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int input_size = 2;
+  constexpr int output_size = 1;
+  constexpr int tensors_size = input_size + output_size;
+  TfLiteTensor tensors[tensors_size] = {
+      // CreateQuantized32Tensor needs min/max values as input, but these values
+      // don't matter as to the functionality of PACK, so just set as 0 and 10.
+      CreateQuantized32Tensor(input1_data, input1_dims, "input1_tensor", 0, 10),
+      CreateQuantized32Tensor(input2_data, input2_dims, "input2_tensor", 0, 10),
+      CreateQuantized32Tensor(output_data, output_dims, "output_tensor", 0,
+                              10)};
+
+  // Place a unique value in the uninitialized output buffer.
+  for (int i = 0; i < output_dims_count; ++i) {
+    output_data[i] = 23;
+  }
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+  tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_PACK, /* version= */ 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLitePackParams builtin_data = {
+      .values_count = 2,
+      .axis = axis,
+  };
+
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, nullptr, 0);
+  }
+  TfLiteIntArray* inputs_array = IntArrayFromInitializer({2, 0, 1});
+  TfLiteIntArray* outputs_array = IntArrayFromInitializer({1, 2});
+  TfLiteIntArray* temporaries_array = IntArrayFromInitializer({0});
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]);
+  }
+}
+
+}  // namespace testing
+}  // namespace tflite
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(PackFloatThreeInputs) {
+  constexpr int output_dims_count = 6;
+  float output_data[output_dims_count];
+  tflite::testing::TestPackThreeInputsFloat(
+      {1, 2},        // Input1 shape
+      {1, 4},        // Input1 values
+      {1, 2},        // Input2 shape
+      {2, 5},        // Input2 values
+      {1, 2},        // Input3 shape
+      {3, 6},        // Input3 values
+      0, {2, 3, 2},  // Output shape
+      {
+          1, 4, 2, 5, 3, 6  // Output values
+      },
+      output_data);
+}
+
+TF_LITE_MICRO_TEST(PackFloatThreeInputsDifferentAxis) {
+  constexpr int output_dims_count = 6;
+  float output_data[output_dims_count];
+  tflite::testing::TestPackThreeInputsFloat(
+      {1, 2},        // Input1 shape
+      {1, 4},        // Input1 values
+      {1, 2},        // Input2 shape
+      {2, 5},        // Input2 values
+      {1, 2},        // Input3 shape
+      {3, 6},        // Input3 values
+      1, {2, 2, 3},  // Output shape
+      {
+          1, 2, 3, 4, 5, 6  // Output values
+      },
+      output_data);
+}
+
+TF_LITE_MICRO_TEST(PackFloatThreeInputsNegativeAxis) {
+  constexpr int output_dims_count = 6;
+  float output_data[output_dims_count];
+  tflite::testing::TestPackThreeInputsFloat(
+      {1, 2},         // Input1 shape
+      {1, 4},         // Input1 values
+      {1, 2},         // Input2 shape
+      {2, 5},         // Input2 values
+      {1, 2},         // Input3 shape
+      {3, 6},         // Input3 values
+      -1, {2, 2, 3},  // Output shape
+      {
+          1, 2, 3, 4, 5, 6  // Output values
+      },
+      output_data);
+}
+
+TF_LITE_MICRO_TEST(PackFloatMultilDimensions) {
+  constexpr int output_dims_count = 12;
+  float output_data[output_dims_count];
+  tflite::testing::TestPackTwoInputsFloat(
+      {2, 2, 3},              // Input1 shape
+      {1, 2, 3, 4, 5, 6},     // Input1 values
+      {2, 2, 3},              // Input2 shape
+      {7, 8, 9, 10, 11, 12},  // Input2 values
+      1, {3, 2, 2, 3},        // Output shape
+      {
+          1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12  // Output values
+      },
+      output_data);
+}
+
+TF_LITE_MICRO_TEST(PackQuantizedMultilDimensions) {
+  constexpr int output_dims_count = 12;
+  uint8_t output_data[output_dims_count];
+  tflite::testing::TestPackTwoInputsQuantized(
+      {2, 2, 3},              // Input1 shape
+      {1, 2, 3, 4, 5, 6},     // Input1 values
+      {2, 2, 3},              // Input2 shape
+      {7, 8, 9, 10, 11, 12},  // Input2 values
+      1, {3, 2, 2, 3},        // Output shape
+      {
+          1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12  // Output values
+      },
+      output_data);
+}
+
+TF_LITE_MICRO_TEST(PackQuantized32MultilDimensions) {
+  constexpr int output_dims_count = 12;
+  int32_t output_data[output_dims_count];
+  tflite::testing::TestPackTwoInputsQuantized32(
+      {2, 2, 3},              // Input1 shape
+      {1, 2, 3, 4, 5, 6},     // Input1 values
+      {2, 2, 3},              // Input2 shape
+      {7, 8, 9, 10, 11, 12},  // Input2 values
+      1, {3, 2, 2, 3},        // Output shape
+      {
+          1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12  // Output values
+      },
+      output_data);
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/lite/experimental/micro/kernels/split.cc b/tensorflow/lite/experimental/micro/kernels/split.cc
new file mode 100644
index 0000000..dc0fe3c
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/kernels/split.cc
@@ -0,0 +1,125 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+namespace split {
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+template <typename T>
+TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
+                       const TfLiteTensor* input, int axis_value) {
+  const int output_count = NumOutputs(node);
+  const TfLiteIntArray* input_dims = input->dims;
+  const TfLiteTensor* output0 = &context->tensors[node->outputs->data[0]];
+  const TfLiteIntArray* output_dims = output0->dims;
+
+  const int split_dimensions = input_dims->size;
+  int axis = axis_value < 0 ? axis_value + split_dimensions : axis_value;
+
+  TFLITE_DCHECK_LT(axis, split_dimensions);
+  TFLITE_DCHECK_EQ(output_dims->size, split_dimensions);
+
+  int64_t split_size = output_dims->data[axis] * output_count;
+
+  TFLITE_DCHECK_EQ(split_size, input_dims->data[axis]);
+  int64_t outer_size = 1;
+  for (int i = 0; i < axis; ++i) {
+    outer_size *= input_dims->data[i];
+  }
+
+  int64_t base_inner_size = 1;
+  for (int i = axis + 1; i < split_dimensions; ++i) {
+    base_inner_size *= input_dims->data[i];
+  }
+
+  const T* input_ptr = GetTensorData<T>(input);
+  for (int k = 0; k < outer_size; ++k) {
+    for (int i = 0; i < output_count; ++i) {
+      TfLiteTensor* t = &context->tensors[node->outputs->data[i]];
+      T* output_data = GetTensorData<T>(t);
+      const int copy_size = output_dims->data[axis] * base_inner_size;
+      T* output_ptr = output_data + k * copy_size;
+      for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
+      input_ptr += copy_size;
+    }
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteTensor* axis = GetInput(context, node, 0);
+  const TfLiteTensor* input = GetInput(context, node, 1);
+
+  // Dynamic output tensors are needed if axis tensor is not constant.
+  // But Micro doesn't support dynamic memeory allocation, so we only support
+  // constant axis tensor for now.
+  TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis),
+                     "Non constant axis tensor not supported");
+
+  int axis_value = GetTensorData<int32_t>(axis)[0];
+  if (axis_value < 0) {
+    axis_value += NumDimensions(input);
+  }
+
+  TF_LITE_ENSURE(context, axis_value >= 0);
+  TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
+
+  switch (input->type) {
+    case kTfLiteFloat32: {
+      return SplitImpl<float>(context, node, input, axis_value);
+    }
+    case kTfLiteUInt8: {
+      return SplitImpl<uint8_t>(context, node, input, axis_value);
+    }
+    case kTfLiteInt8: {
+      return SplitImpl<int8_t>(context, node, input, axis_value);
+    }
+    case kTfLiteInt16: {
+      return SplitImpl<int16_t>(context, node, input, axis_value);
+    }
+    case kTfLiteInt32: {
+      return SplitImpl<int32_t>(context, node, input, axis_value);
+    }
+    default:
+      context->ReportError(context, "Type %s currently not supported.",
+                           TfLiteTypeGetName(input->type));
+      return kTfLiteError;
+  }
+#undef TF_LITE_SPLIT
+
+  return kTfLiteOk;
+}
+
+}  // namespace split
+
+TfLiteRegistration* Register_SPLIT() {
+  static TfLiteRegistration r = {nullptr, nullptr, split::Prepare, split::Eval};
+  return &r;
+}
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/lite/experimental/micro/kernels/split_test.cc b/tensorflow/lite/experimental/micro/kernels/split_test.cc
new file mode 100644
index 0000000..1ee7a69
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/kernels/split_test.cc
@@ -0,0 +1,571 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/debug_log.h"
+#include "tensorflow/lite/experimental/micro/kernels/all_ops_resolver.h"
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
+#include "tensorflow/lite/experimental/micro/testing/test_utils.h"
+
+namespace tflite {
+namespace testing {
+
+void TestSplitTwoOutputsFloat(
+    std::initializer_list<int> input_dims_data,
+    std::initializer_list<float> input_data,
+    std::initializer_list<int> axis_dims_data,
+    std::initializer_list<int32_t> axis_data,
+    std::initializer_list<int> output1_dims_data,
+    std::initializer_list<float> expected_output1_data,
+    std::initializer_list<int> output2_dims_data,
+    std::initializer_list<float> expected_output2_data, float* output1_data,
+    float* output2_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* axis_dims = IntArrayFromInitializer(axis_dims_data);
+  TfLiteIntArray* output1_dims = IntArrayFromInitializer(output1_dims_data);
+  TfLiteIntArray* output2_dims = IntArrayFromInitializer(output2_dims_data);
+  const int output1_dims_count = ElementCount(*output1_dims);
+  const int output2_dims_count = ElementCount(*output2_dims);
+
+  constexpr int input_size = 1;
+  constexpr int output_size = 2;
+  constexpr int axis_size = 1;
+  constexpr int tensors_size = input_size + output_size + axis_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateQuantized32Tensor(axis_data, axis_dims, "axis_tensor", 0, 5),
+      CreateFloatTensor(input_data, input_dims, "input_tensor"),
+      CreateFloatTensor(output1_data, output1_dims, "output1_tensor"),
+      CreateFloatTensor(output2_data, output2_dims, "output2_tensor")};
+
+  // Currently only support constant axis tensor.
+  tensors[0].allocation_type = kTfLiteMmapRo;
+  // Place a unique value in the uninitialized output buffer.
+  for (int i = 0; i < output1_dims_count; ++i) {
+    output1_data[i] = 23;
+  }
+
+  for (int i = 0; i < output2_dims_count; ++i) {
+    output2_data[i] = 23;
+  }
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+  tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_SPLIT, /* version= */ 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteSplitParams builtin_data = {
+      .num_splits = 2,
+  };
+
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, nullptr, 0);
+  }
+  TfLiteIntArray* inputs_array = IntArrayFromInitializer({2, 0, 1});
+  TfLiteIntArray* outputs_array = IntArrayFromInitializer({2, 2, 3});
+  TfLiteIntArray* temporaries_array = IntArrayFromInitializer({0});
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+
+  for (int i = 0; i < output1_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output1_data.begin()[i], output1_data[i],
+                              1e-5f);
+  }
+
+  for (int i = 0; i < output2_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output2_data.begin()[i], output2_data[i],
+                              1e-5f);
+  }
+}
+
+void TestSplitFourOutputsFloat(
+    std::initializer_list<int> input_dims_data,
+    std::initializer_list<float> input_data,
+    std::initializer_list<int> axis_dims_data,
+    std::initializer_list<int32_t> axis_data,
+    std::initializer_list<int> output1_dims_data,
+    std::initializer_list<float> expected_output1_data,
+    std::initializer_list<int> output2_dims_data,
+    std::initializer_list<float> expected_output2_data,
+    std::initializer_list<int> output3_dims_data,
+    std::initializer_list<float> expected_output3_data,
+    std::initializer_list<int> output4_dims_data,
+    std::initializer_list<float> expected_output4_data, float* output1_data,
+    float* output2_data, float* output3_data, float* output4_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* axis_dims = IntArrayFromInitializer(axis_dims_data);
+  TfLiteIntArray* output1_dims = IntArrayFromInitializer(output1_dims_data);
+  TfLiteIntArray* output2_dims = IntArrayFromInitializer(output2_dims_data);
+  TfLiteIntArray* output3_dims = IntArrayFromInitializer(output3_dims_data);
+  TfLiteIntArray* output4_dims = IntArrayFromInitializer(output4_dims_data);
+  const int output1_dims_count = ElementCount(*output1_dims);
+  const int output2_dims_count = ElementCount(*output2_dims);
+  const int output3_dims_count = ElementCount(*output3_dims);
+  const int output4_dims_count = ElementCount(*output4_dims);
+
+  constexpr int input_size = 1;
+  constexpr int output_size = 4;
+  constexpr int axis_size = 1;
+  constexpr int tensors_size = input_size + output_size + axis_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateQuantized32Tensor(axis_data, axis_dims, "axis_tensor", 0, 5),
+      CreateFloatTensor(input_data, input_dims, "input_tensor"),
+      CreateFloatTensor(output1_data, output1_dims, "output1_tensor"),
+      CreateFloatTensor(output2_data, output2_dims, "output2_tensor"),
+      CreateFloatTensor(output3_data, output1_dims, "output3_tensor"),
+      CreateFloatTensor(output4_data, output1_dims, "output4_tensor")};
+
+  // Currently only support constant axis tensor.
+  tensors[0].allocation_type = kTfLiteMmapRo;
+  // Place a unique value in the uninitialized output buffer.
+  for (int i = 0; i < output1_dims_count; ++i) {
+    output1_data[i] = 23;
+  }
+  for (int i = 0; i < output2_dims_count; ++i) {
+    output2_data[i] = 23;
+  }
+  for (int i = 0; i < output3_dims_count; ++i) {
+    output3_data[i] = 23;
+  }
+  for (int i = 0; i < output4_dims_count; ++i) {
+    output4_data[i] = 23;
+  }
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+  tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_SPLIT, /* version= */ 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteSplitParams builtin_data = {
+      .num_splits = 4,
+  };
+
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, nullptr, 0);
+  }
+  TfLiteIntArray* inputs_array = IntArrayFromInitializer({2, 0, 1});
+  TfLiteIntArray* outputs_array = IntArrayFromInitializer({4, 2, 3, 4, 5});
+  TfLiteIntArray* temporaries_array = IntArrayFromInitializer({0});
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+
+  for (int i = 0; i < output1_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output1_data.begin()[i], output1_data[i],
+                              1e-5f);
+  }
+  for (int i = 0; i < output2_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output2_data.begin()[i], output2_data[i],
+                              1e-5f);
+  }
+  for (int i = 0; i < output3_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output3_data.begin()[i], output3_data[i],
+                              1e-5f);
+  }
+  for (int i = 0; i < output4_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output4_data.begin()[i], output4_data[i],
+                              1e-5f);
+  }
+}
+
+void TestSplitTwoOutputsQuantized(
+    std::initializer_list<int> input_dims_data,
+    std::initializer_list<uint8_t> input_data,
+    std::initializer_list<int> axis_dims_data,
+    std::initializer_list<int32_t> axis_data,
+    std::initializer_list<int> output1_dims_data,
+    std::initializer_list<uint8_t> expected_output1_data,
+    std::initializer_list<int> output2_dims_data,
+    std::initializer_list<uint8_t> expected_output2_data, uint8_t* output1_data,
+    uint8_t* output2_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* axis_dims = IntArrayFromInitializer(axis_dims_data);
+  TfLiteIntArray* output1_dims = IntArrayFromInitializer(output1_dims_data);
+  TfLiteIntArray* output2_dims = IntArrayFromInitializer(output2_dims_data);
+  const int output1_dims_count = ElementCount(*output1_dims);
+  const int output2_dims_count = ElementCount(*output2_dims);
+
+  constexpr int input_size = 1;
+  constexpr int output_size = 2;
+  constexpr int axis_size = 1;
+  constexpr int tensors_size = input_size + output_size + axis_size;
+  TfLiteTensor tensors[tensors_size] = {
+      // CreateQuantizedTensor needs min/max values as input, but these values
+      // don't matter as to the functionality of SPLIT, so just set as 0 and 10.
+      CreateQuantized32Tensor(axis_data, axis_dims, "axis_tensor", 0, 10),
+      CreateQuantizedTensor(input_data, input_dims, "input_tensor", 0, 10),
+      CreateQuantizedTensor(output1_data, output1_dims, "output1_tensor", 0,
+                            10),
+      CreateQuantizedTensor(output2_data, output2_dims, "output2_tensor", 0,
+                            10)};
+
+  // Currently only support constant axis tensor.
+  tensors[0].allocation_type = kTfLiteMmapRo;
+
+  // Place a unique value in the uninitialized output buffer.
+  for (int i = 0; i < output1_dims_count; ++i) {
+    output1_data[i] = 23;
+  }
+
+  for (int i = 0; i < output2_dims_count; ++i) {
+    output2_data[i] = 23;
+  }
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+  tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_SPLIT, /* version= */ 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteSplitParams builtin_data = {
+      .num_splits = 2,
+  };
+
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, nullptr, 0);
+  }
+  TfLiteIntArray* inputs_array = IntArrayFromInitializer({2, 0, 1});
+  TfLiteIntArray* outputs_array = IntArrayFromInitializer({2, 2, 3});
+  TfLiteIntArray* temporaries_array = IntArrayFromInitializer({0});
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+
+  for (int i = 0; i < output1_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output1_data.begin()[i], output1_data[i]);
+  }
+
+  for (int i = 0; i < output2_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output2_data.begin()[i], output2_data[i]);
+  }
+}
+
+void TestSplitTwoOutputsQuantized32(
+    std::initializer_list<int> input_dims_data,
+    std::initializer_list<int32_t> input_data,
+    std::initializer_list<int> axis_dims_data,
+    std::initializer_list<int32_t> axis_data,
+    std::initializer_list<int> output1_dims_data,
+    std::initializer_list<int32_t> expected_output1_data,
+    std::initializer_list<int> output2_dims_data,
+    std::initializer_list<int32_t> expected_output2_data, int32_t* output1_data,
+    int32_t* output2_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* axis_dims = IntArrayFromInitializer(axis_dims_data);
+  TfLiteIntArray* output1_dims = IntArrayFromInitializer(output1_dims_data);
+  TfLiteIntArray* output2_dims = IntArrayFromInitializer(output2_dims_data);
+  const int output1_dims_count = ElementCount(*output1_dims);
+  const int output2_dims_count = ElementCount(*output2_dims);
+
+  constexpr int input_size = 1;
+  constexpr int output_size = 2;
+  constexpr int axis_size = 1;
+  constexpr int tensors_size = input_size + output_size + axis_size;
+  TfLiteTensor tensors[tensors_size] = {
+      // CreateQuantizedTensor needs min/max values as input, but these values
+      // don't matter as to the functionality of SPLIT, so just set as 0 and 10.
+      CreateQuantized32Tensor(axis_data, axis_dims, "axis_tensor", 0, 10),
+      CreateQuantized32Tensor(input_data, input_dims, "input_tensor", 0, 10),
+      CreateQuantized32Tensor(output1_data, output1_dims, "output1_tensor", 0,
+                              10),
+      CreateQuantized32Tensor(output2_data, output2_dims, "output2_tensor", 0,
+                              10)};
+
+  // Currently only support constant axis tensor.
+  tensors[0].allocation_type = kTfLiteMmapRo;
+
+  // Place a unique value in the uninitialized output buffer.
+  for (int i = 0; i < output1_dims_count; ++i) {
+    output1_data[i] = 23;
+  }
+
+  for (int i = 0; i < output2_dims_count; ++i) {
+    output2_data[i] = 23;
+  }
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+  tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_SPLIT, /* version= */ 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteSplitParams builtin_data = {
+      .num_splits = 2,
+  };
+
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, nullptr, 0);
+  }
+  TfLiteIntArray* inputs_array = IntArrayFromInitializer({2, 0, 1});
+  TfLiteIntArray* outputs_array = IntArrayFromInitializer({2, 2, 3});
+  TfLiteIntArray* temporaries_array = IntArrayFromInitializer({0});
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+
+  for (int i = 0; i < output1_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output1_data.begin()[i], output1_data[i]);
+  }
+
+  for (int i = 0; i < output2_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output2_data.begin()[i], output2_data[i]);
+  }
+}
+
+}  // namespace testing
+}  // namespace tflite
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(TwoSplitFourDimensionalAxisZero) {
+  constexpr int output1_dims_count = 8;
+  constexpr int output2_dims_count = 8;
+  float output1_data[output1_dims_count];
+  float output2_data[output2_dims_count];
+  tflite::testing::TestSplitTwoOutputsFloat(
+      {4, 2, 2, 2, 2},                                          // Input shape
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},  // Input values
+      {1, 1},                                                   // Axis shape
+      {0},                                                      // Axis value
+      {4, 1, 2, 2, 2},                                          // Output1 shape
+      {1, 2, 3, 4, 5, 6, 7, 8},         // Output1 values
+      {4, 1, 2, 2, 2},                  // Output2 shape
+      {9, 10, 11, 12, 13, 14, 15, 16},  // Output2 values
+      output1_data, output2_data);
+}
+
+TF_LITE_MICRO_TEST(TwoSplitFourDimensionalAxisOne) {
+  constexpr int output1_dims_count = 8;
+  constexpr int output2_dims_count = 8;
+  float output1_data[output1_dims_count];
+  float output2_data[output2_dims_count];
+  tflite::testing::TestSplitTwoOutputsFloat(
+      {4, 2, 2, 2, 2},                                          // Input shape
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},  // Input values
+      {1, 1},                                                   // Axis shape
+      {1},                                                      // Axis value
+      {4, 2, 1, 2, 2},                                          // Output1 shape
+      {1, 2, 3, 4, 9, 10, 11, 12},   // Output1 values
+      {4, 2, 1, 2, 2},               // Output2 shape
+      {5, 6, 7, 8, 13, 14, 15, 16},  // Output2 values
+      output1_data, output2_data);
+}
+
+TF_LITE_MICRO_TEST(TwoSplitFourDimensionalAxisTwo) {
+  constexpr int output1_dims_count = 8;
+  constexpr int output2_dims_count = 8;
+  float output1_data[output1_dims_count];
+  float output2_data[output2_dims_count];
+  tflite::testing::TestSplitTwoOutputsFloat(
+      {4, 2, 2, 2, 2},                                          // Input shape
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},  // Input values
+      {1, 1},                                                   // Axis shape
+      {2},                                                      // Axis value
+      {4, 2, 2, 1, 2},                                          // Output1 shape
+      {1, 2, 5, 6, 9, 10, 13, 14},   // Output1 values
+      {4, 2, 2, 1, 2},               // Output2 shape
+      {3, 4, 7, 8, 11, 12, 15, 16},  // Output2 values
+      output1_data, output2_data);
+}
+
+TF_LITE_MICRO_TEST(TwoSplitFourDimensionalAxisThree) {
+  constexpr int output1_dims_count = 8;
+  constexpr int output2_dims_count = 8;
+  float output1_data[output1_dims_count];
+  float output2_data[output2_dims_count];
+  tflite::testing::TestSplitTwoOutputsFloat(
+      {4, 2, 2, 2, 2},                                          // Input shape
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},  // Input values
+      {1, 1},                                                   // Axis shape
+      {3},                                                      // Axis value
+      {4, 2, 2, 2, 1},                                          // Output1 shape
+      {1, 3, 5, 7, 9, 11, 13, 15},   // Output1 values
+      {4, 2, 2, 2, 1},               // Output2 shape
+      {2, 4, 6, 8, 10, 12, 14, 16},  // Output2 values
+      output1_data, output2_data);
+}
+
+TF_LITE_MICRO_TEST(TwoSplitFourDimensionalNegativeAxis) {
+  constexpr int output1_dims_count = 8;
+  constexpr int output2_dims_count = 8;
+  float output1_data[output1_dims_count];
+  float output2_data[output2_dims_count];
+  tflite::testing::TestSplitTwoOutputsFloat(
+      {4, 2, 2, 2, 2},                                          // Input shape
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},  // Input values
+      {1, 1},                                                   // Axis shape
+      {-4},                                                     // Axis value
+      {4, 1, 2, 2, 2},                                          // Output1 shape
+      {1, 2, 3, 4, 5, 6, 7, 8},         // Output1 values
+      {4, 1, 2, 2, 2},                  // Output2 shape
+      {9, 10, 11, 12, 13, 14, 15, 16},  // Output2 values
+      output1_data, output2_data);
+}
+
+TF_LITE_MICRO_TEST(FourSplit) {
+  constexpr int output1_dims_count = 1;
+  constexpr int output2_dims_count = 1;
+  constexpr int output3_dims_count = 1;
+  constexpr int output4_dims_count = 1;
+  float output1_data[output1_dims_count];
+  float output2_data[output2_dims_count];
+  float output3_data[output3_dims_count];
+  float output4_data[output4_dims_count];
+  tflite::testing::TestSplitFourOutputsFloat({1, 4},        // Input shape
+                                             {1, 2, 3, 4},  // Input values
+                                             {1, 1},        // Axis shape
+                                             {0},           // Axis value
+                                             {1, 1},        // Output1 shape
+                                             {1},           // Output1 values
+                                             {1, 1},        // Output2 shape
+                                             {2},           // Output2 values
+                                             {1, 1},        // Output3 shape
+                                             {3},           // Output3 values
+                                             {1, 1},        // Output4 shape
+                                             {4},           // Output4 values
+                                             output1_data, output2_data,
+                                             output3_data, output4_data);
+}
+
+TF_LITE_MICRO_TEST(TwoSplitOneDimensional) {
+  constexpr int output1_dims_count = 8;
+  constexpr int output2_dims_count = 8;
+  float output1_data[output1_dims_count];
+  float output2_data[output2_dims_count];
+  tflite::testing::TestSplitTwoOutputsFloat({1, 2},  // Input shape
+                                            {1, 2},  // Input values
+                                            {1, 1},  // Axis shape
+                                            {0},     // Axis value
+                                            {1, 1},  // Output1 shape
+                                            {1},     // Output1 values
+                                            {1, 1},  // Output2 shape
+                                            {2},     // Output2 values
+                                            output1_data, output2_data);
+}
+
+TF_LITE_MICRO_TEST(TwoSplitFourDimensionalQuantized) {
+  constexpr int output1_dims_count = 8;
+  constexpr int output2_dims_count = 8;
+  uint8_t output1_data[output1_dims_count];
+  uint8_t output2_data[output2_dims_count];
+  tflite::testing::TestSplitTwoOutputsQuantized(
+      {4, 2, 2, 2, 2},                                          // Input shape
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},  // Input values
+      {1, 1},                                                   // Axis shape
+      {0},                                                      // Axis value
+      {4, 1, 2, 2, 2},                                          // Output1 shape
+      {1, 2, 3, 4, 5, 6, 7, 8},         // Output1 values
+      {4, 1, 2, 2, 2},                  // Output2 shape
+      {9, 10, 11, 12, 13, 14, 15, 16},  // Output2 values
+      output1_data, output2_data);
+}
+
+TF_LITE_MICRO_TEST(TwoSplitFourDimensionalQuantized32) {
+  constexpr int output1_dims_count = 8;
+  constexpr int output2_dims_count = 8;
+  int32_t output1_data[output1_dims_count];
+  int32_t output2_data[output2_dims_count];
+  tflite::testing::TestSplitTwoOutputsQuantized32(
+      {4, 2, 2, 2, 2},                                          // Input shape
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},  // Input values
+      {1, 1},                                                   // Axis shape
+      {0},                                                      // Axis value
+      {4, 1, 2, 2, 2},                                          // Output1 shape
+      {1, 2, 3, 4, 5, 6, 7, 8},         // Output1 values
+      {4, 1, 2, 2, 2},                  // Output2 shape
+      {9, 10, 11, 12, 13, 14, 15, 16},  // Output2 values
+      output1_data, output2_data);
+}
+
+TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/lite/experimental/micro/kernels/unpack.cc b/tensorflow/lite/experimental/micro/kernels/unpack.cc
new file mode 100644
index 0000000..c446844
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/kernels/unpack.cc
@@ -0,0 +1,116 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+namespace unpack {
+namespace {
+
+constexpr int kInputTensor = 0;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+template <typename T>
+TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node,
+                        const TfLiteTensor* input, int output_count, int axis) {
+  const TfLiteTensor* output0 = &context->tensors[node->outputs->data[0]];
+  const TfLiteIntArray* input_dims = input->dims;
+  const TfLiteIntArray* output_dims = output0->dims;
+  const int dimensions = input_dims->size;
+
+  if (axis < 0) {
+    axis += NumDimensions(input);
+  }
+
+  TFLITE_DCHECK_LT(axis, dimensions);
+
+  int outer_size = 1;
+  for (int i = 0; i < axis; ++i) {
+    outer_size *= input_dims->data[i];
+  }
+  int copy_size = 1;
+  for (int i = axis + 1; i < dimensions; ++i) {
+    copy_size *= input_dims->data[i];
+  }
+  int output_size = 1;
+  for (int i = 0; i < output_dims->size; ++i) {
+    output_size *= output_dims->data[i];
+  }
+  TFLITE_DCHECK_EQ(output_size, copy_size * outer_size);
+
+  const T* input_data = GetTensorData<T>(input);
+
+  for (int i = 0; i < output_count; ++i) {
+    TfLiteTensor* t = &context->tensors[node->outputs->data[i]];
+    T* output_data = GetTensorData<T>(t);
+    for (int k = 0; k < outer_size; ++k) {
+      T* output_ptr = output_data + copy_size * k;
+      int loc = k * output_count * copy_size + i * copy_size;
+      const T* input_ptr = input_data + loc;
+      for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j];
+    }
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  TfLiteUnpackParams* data =
+      reinterpret_cast<TfLiteUnpackParams*>(node->builtin_data);
+
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+
+  switch (input->type) {
+    case kTfLiteFloat32: {
+      return UnpackImpl<float>(context, node, input, data->num, data->axis);
+    }
+    case kTfLiteInt32: {
+      return UnpackImpl<int32_t>(context, node, input, data->num, data->axis);
+    }
+    case kTfLiteUInt8: {
+      return UnpackImpl<uint8_t>(context, node, input, data->num, data->axis);
+    }
+    case kTfLiteInt8: {
+      return UnpackImpl<int8_t>(context, node, input, data->num, data->axis);
+    }
+    default: {
+      context->ReportError(context, "Type '%s' is not supported by unpack.",
+                           TfLiteTypeGetName(input->type));
+      return kTfLiteError;
+    }
+  }
+
+  return kTfLiteOk;
+}
+}  // namespace
+}  // namespace unpack
+
+TfLiteRegistration* Register_UNPACK() {
+  static TfLiteRegistration r = {nullptr, nullptr, unpack::Prepare,
+                                 unpack::Eval};
+  return &r;
+}
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/lite/experimental/micro/kernels/unpack_test.cc b/tensorflow/lite/experimental/micro/kernels/unpack_test.cc
new file mode 100644
index 0000000..a05ddfa
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/kernels/unpack_test.cc
@@ -0,0 +1,479 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/experimental/micro/debug_log.h"
+#include "tensorflow/lite/experimental/micro/kernels/all_ops_resolver.h"
+#include "tensorflow/lite/experimental/micro/testing/micro_test.h"
+#include "tensorflow/lite/experimental/micro/testing/test_utils.h"
+
+namespace tflite {
+namespace testing {
+
+void TestUnpackThreeOutputsFloat(
+    std::initializer_list<int> input_dims_data,
+    std::initializer_list<float> input_data, int axis,
+    std::initializer_list<int> output1_dims_data,
+    std::initializer_list<float> expected_output1_data,
+    std::initializer_list<int> output2_dims_data,
+    std::initializer_list<float> expected_output2_data,
+    std::initializer_list<int> output3_dims_data,
+    std::initializer_list<float> expected_output3_data, float* output1_data,
+    float* output2_data, float* output3_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* output1_dims = IntArrayFromInitializer(output1_dims_data);
+  TfLiteIntArray* output2_dims = IntArrayFromInitializer(output2_dims_data);
+  TfLiteIntArray* output3_dims = IntArrayFromInitializer(output3_dims_data);
+  const int output1_dims_count = ElementCount(*output1_dims);
+  const int output2_dims_count = ElementCount(*output2_dims);
+  const int output3_dims_count = ElementCount(*output3_dims);
+
+  constexpr int input_size = 1;
+  constexpr int output_size = 3;
+  constexpr int tensors_size = input_size + output_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateFloatTensor(input_data, input_dims, "input_tensor"),
+      CreateFloatTensor(output1_data, output1_dims, "output1_tensor"),
+      CreateFloatTensor(output2_data, output2_dims, "output2_tensor"),
+      CreateFloatTensor(output3_data, output3_dims, "output3_tensor")};
+
+  // Place a unique value in the uninitialized output buffer.
+  for (int i = 0; i < output1_dims_count; ++i) {
+    output1_data[i] = 23;
+  }
+
+  for (int i = 0; i < output2_dims_count; ++i) {
+    output2_data[i] = 23;
+  }
+
+  for (int i = 0; i < output3_dims_count; ++i) {
+    output3_data[i] = 23;
+  }
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+  tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_UNPACK, /* version= */ 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteUnpackParams builtin_data = {
+      .num = 3,
+      .axis = axis,
+  };
+
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, nullptr, 0);
+  }
+  TfLiteIntArray* inputs_array = IntArrayFromInitializer({1, 0});
+  TfLiteIntArray* outputs_array = IntArrayFromInitializer({3, 1, 2, 3});
+  TfLiteIntArray* temporaries_array = IntArrayFromInitializer({0});
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+
+  for (int i = 0; i < output1_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output1_data.begin()[i], output1_data[i],
+                              1e-5f);
+  }
+
+  for (int i = 0; i < output2_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output2_data.begin()[i], output2_data[i],
+                              1e-5f);
+  }
+
+  for (int i = 0; i < output3_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output3_data.begin()[i], output3_data[i],
+                              1e-5f);
+  }
+}
+
+void TestUnpackOneOutputFloat(std::initializer_list<int> input_dims_data,
+                              std::initializer_list<float> input_data, int axis,
+                              std::initializer_list<int> output_dims_data,
+                              std::initializer_list<float> expected_output_data,
+                              float* output_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_dims_count = ElementCount(*output_dims);
+
+  constexpr int input_size = 1;
+  constexpr int output_size = 1;
+  constexpr int tensors_size = input_size + output_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateFloatTensor(input_data, input_dims, "input_tensor"),
+      CreateFloatTensor(output_data, output_dims, "output_tensor")};
+
+  // Place a unique value in the uninitialized output buffer.
+  for (int i = 0; i < output_dims_count; ++i) {
+    output_data[i] = 23;
+  }
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+  tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_UNPACK, /* version= */ 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteUnpackParams builtin_data = {
+      .num = 1,
+      .axis = axis,
+  };
+
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, nullptr, 0);
+  }
+  TfLiteIntArray* inputs_array = IntArrayFromInitializer({1, 0});
+  TfLiteIntArray* outputs_array = IntArrayFromInitializer({1, 1});
+  TfLiteIntArray* temporaries_array = IntArrayFromInitializer({0});
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+
+  for (int i = 0; i < output_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i],
+                              1e-5f);
+  }
+}
+
+void TestUnpackThreeOutputsQuantized(
+    std::initializer_list<int> input_dims_data,
+    std::initializer_list<uint8_t> input_data, int axis,
+    std::initializer_list<int> output1_dims_data,
+    std::initializer_list<uint8_t> expected_output1_data,
+    std::initializer_list<int> output2_dims_data,
+    std::initializer_list<uint8_t> expected_output2_data,
+    std::initializer_list<int> output3_dims_data,
+    std::initializer_list<uint8_t> expected_output3_data, uint8_t* output1_data,
+    uint8_t* output2_data, uint8_t* output3_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* output1_dims = IntArrayFromInitializer(output1_dims_data);
+  TfLiteIntArray* output2_dims = IntArrayFromInitializer(output2_dims_data);
+  TfLiteIntArray* output3_dims = IntArrayFromInitializer(output3_dims_data);
+  const int output1_dims_count = ElementCount(*output1_dims);
+  const int output2_dims_count = ElementCount(*output2_dims);
+  const int output3_dims_count = ElementCount(*output3_dims);
+
+  constexpr int input_size = 1;
+  constexpr int output_size = 3;
+  constexpr int tensors_size = input_size + output_size;
+  TfLiteTensor tensors[tensors_size] = {
+      // CreateQuantizedTensor needs min/max values as input, but these values
+      // don't matter as to the functionality of UNPACK, so just set as 0
+      // and 10.
+      CreateQuantizedTensor(input_data, input_dims, "input_tensor", 0, 10),
+      CreateQuantizedTensor(output1_data, output1_dims, "output1_tensor", 0,
+                            10),
+      CreateQuantizedTensor(output2_data, output2_dims, "output2_tensor", 0,
+                            10),
+      CreateQuantizedTensor(output3_data, output3_dims, "output3_tensor", 0,
+                            10)};
+
+  // Place a unique value in the uninitialized output buffer.
+  for (int i = 0; i < output1_dims_count; ++i) {
+    output1_data[i] = 23;
+  }
+
+  for (int i = 0; i < output2_dims_count; ++i) {
+    output2_data[i] = 23;
+  }
+
+  for (int i = 0; i < output3_dims_count; ++i) {
+    output3_data[i] = 23;
+  }
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+  tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_UNPACK, /* version= */ 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteUnpackParams builtin_data = {
+      .num = 3,
+      .axis = axis,
+  };
+
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, nullptr, 0);
+  }
+  TfLiteIntArray* inputs_array = IntArrayFromInitializer({1, 0});
+  TfLiteIntArray* outputs_array = IntArrayFromInitializer({3, 1, 2, 3});
+  TfLiteIntArray* temporaries_array = IntArrayFromInitializer({0});
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+
+  for (int i = 0; i < output1_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output1_data.begin()[i], output1_data[i]);
+  }
+
+  for (int i = 0; i < output2_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output2_data.begin()[i], output2_data[i]);
+  }
+
+  for (int i = 0; i < output3_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output3_data.begin()[i], output3_data[i]);
+  }
+}
+
+void TestUnpackThreeOutputsQuantized32(
+    std::initializer_list<int> input_dims_data,
+    std::initializer_list<int32_t> input_data, int axis,
+    std::initializer_list<int> output1_dims_data,
+    std::initializer_list<int32_t> expected_output1_data,
+    std::initializer_list<int> output2_dims_data,
+    std::initializer_list<int32_t> expected_output2_data,
+    std::initializer_list<int> output3_dims_data,
+    std::initializer_list<int32_t> expected_output3_data, int32_t* output1_data,
+    int32_t* output2_data, int32_t* output3_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* output1_dims = IntArrayFromInitializer(output1_dims_data);
+  TfLiteIntArray* output2_dims = IntArrayFromInitializer(output2_dims_data);
+  TfLiteIntArray* output3_dims = IntArrayFromInitializer(output3_dims_data);
+  const int output1_dims_count = ElementCount(*output1_dims);
+  const int output2_dims_count = ElementCount(*output2_dims);
+  const int output3_dims_count = ElementCount(*output3_dims);
+
+  constexpr int input_size = 1;
+  constexpr int output_size = 3;
+  constexpr int tensors_size = input_size + output_size;
+  TfLiteTensor tensors[tensors_size] = {
+      // CreateQuantizedTensor needs min/max values as input, but these values
+      // don't matter as to the functionality of UNPACK, so just set as 0
+      // and 10.
+      CreateQuantized32Tensor(input_data, input_dims, "input_tensor", 0, 10),
+      CreateQuantized32Tensor(output1_data, output1_dims, "output1_tensor", 0,
+                              10),
+      CreateQuantized32Tensor(output2_data, output2_dims, "output2_tensor", 0,
+                              10),
+      CreateQuantized32Tensor(output3_data, output3_dims, "output3_tensor", 0,
+                              10)};
+
+  // Place a unique value in the uninitialized output buffer.
+  for (int i = 0; i < output1_dims_count; ++i) {
+    output1_data[i] = 23;
+  }
+
+  for (int i = 0; i < output2_dims_count; ++i) {
+    output2_data[i] = 23;
+  }
+
+  for (int i = 0; i < output3_dims_count; ++i) {
+    output3_data[i] = 23;
+  }
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, &context);
+  tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_UNPACK, /* version= */ 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  TfLiteUnpackParams builtin_data = {
+      .num = 3,
+      .axis = axis,
+  };
+
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, nullptr, 0);
+  }
+  TfLiteIntArray* inputs_array = IntArrayFromInitializer({1, 0});
+  TfLiteIntArray* outputs_array = IntArrayFromInitializer({3, 1, 2, 3});
+  TfLiteIntArray* temporaries_array = IntArrayFromInitializer({0});
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = temporaries_array;
+  node.user_data = user_data;
+  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+
+  for (int i = 0; i < output1_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output1_data.begin()[i], output1_data[i]);
+  }
+
+  for (int i = 0; i < output2_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output2_data.begin()[i], output2_data[i]);
+  }
+
+  for (int i = 0; i < output3_dims_count; ++i) {
+    TF_LITE_MICRO_EXPECT_EQ(expected_output3_data.begin()[i], output3_data[i]);
+  }
+}
+
+}  // namespace testing
+}  // namespace tflite
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(UnpackFloatThreeOutputs) {
+  constexpr int output1_dims_count = 2;
+  constexpr int output2_dims_count = 2;
+  constexpr int output3_dims_count = 2;
+  float output1_data[output1_dims_count];
+  float output2_data[output2_dims_count];
+  float output3_data[output3_dims_count];
+  tflite::testing::TestUnpackThreeOutputsFloat(
+      {2, 3, 2},           // Input shape
+      {1, 2, 3, 4, 5, 6},  // Input values
+      0, {1, 2},           // Output1 shape
+      {1, 2},              // Output1 values
+      {1, 2},              // Output2 shape
+      {3, 4},              // Output2 values
+      {1, 2},              // Output3 shape
+      {5, 6},              // Output3 values
+      output1_data, output2_data, output3_data);
+}
+
+TF_LITE_MICRO_TEST(UnpackFloatThreeOutputsNegativeAxisTwo) {
+  constexpr int output1_dims_count = 2;
+  constexpr int output2_dims_count = 2;
+  constexpr int output3_dims_count = 2;
+  float output1_data[output1_dims_count];
+  float output2_data[output2_dims_count];
+  float output3_data[output3_dims_count];
+  tflite::testing::TestUnpackThreeOutputsFloat(
+      {2, 3, 2},           // Input shape
+      {1, 2, 3, 4, 5, 6},  // Input values
+      -2, {1, 2},          // Output1 shape
+      {1, 2},              // Output1 values
+      {1, 2},              // Output2 shape
+      {3, 4},              // Output2 values
+      {1, 2},              // Output3 shape
+      {5, 6},              // Output3 values
+      output1_data, output2_data, output3_data);
+}
+
+TF_LITE_MICRO_TEST(UnpackFloatOneOutput) {
+  constexpr int output_dims_count = 6;
+  float output_data[output_dims_count];
+  tflite::testing::TestUnpackOneOutputFloat(
+      {2, 1, 6},           // Input shape
+      {1, 2, 3, 4, 5, 6},  // Input values
+      0, {1, 6},           // Output shape
+      {1, 2, 3, 4, 5, 6},  // Output values
+      output_data);
+}
+
+TF_LITE_MICRO_TEST(UnpackQuantizedThreeOutputs) {
+  constexpr int output1_dims_count = 2;
+  constexpr int output2_dims_count = 2;
+  constexpr int output3_dims_count = 2;
+  uint8_t output1_data[output1_dims_count];
+  uint8_t output2_data[output2_dims_count];
+  uint8_t output3_data[output3_dims_count];
+  tflite::testing::TestUnpackThreeOutputsQuantized(
+      {2, 3, 2},           // Input shape
+      {1, 2, 3, 4, 5, 6},  // Input values
+      0, {1, 2},           // Output1 shape
+      {1, 2},              // Output1 values
+      {1, 2},              // Output2 shape
+      {3, 4},              // Output2 values
+      {1, 2},              // Output3 shape
+      {5, 6},              // Output3 values
+      output1_data, output2_data, output3_data);
+}
+
+TF_LITE_MICRO_TEST(UnpackQuantized32ThreeOutputs) {
+  constexpr int output1_dims_count = 2;
+  constexpr int output2_dims_count = 2;
+  constexpr int output3_dims_count = 2;
+  int32_t output1_data[output1_dims_count];
+  int32_t output2_data[output2_dims_count];
+  int32_t output3_data[output3_dims_count];
+  tflite::testing::TestUnpackThreeOutputsQuantized32(
+      {2, 3, 2},           // Input shape
+      {1, 2, 3, 4, 5, 6},  // Input values
+      0, {1, 2},           // Output1 shape
+      {1, 2},              // Output1 values
+      {1, 2},              // Output2 shape
+      {3, 4},              // Output2 values
+      {1, 2},              // Output3 shape
+      {5, 6},              // Output3 values
+      output1_data, output2_data, output3_data);
+}
+
+TF_LITE_MICRO_TESTS_END