tensorflow/python/feature_column/dense_features_v2_test.py - platform/external/tensorflow - Git at Google

 # Copyright 2019 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Tests for dense_features_v2."""

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import numpy as np

 from tensorflow.python.client import session
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.feature_column import dense_features_v2 as df
 from tensorflow.python.feature_column import feature_column_v2 as fc
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import lookup_ops
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import test


 def _initialized_session(config=None):
   sess = session.Session(config=config)
   sess.run(variables_lib.global_variables_initializer())
   sess.run(lookup_ops.tables_initializer())
   return sess


 class DenseFeaturesTest(test.TestCase):

   @test_util.run_in_graph_and_eager_modes()
   def test_retrieving_input(self):
     features = {'a': [0.]}
     dense_features = df.DenseFeatures(fc.numeric_column('a'))
     inputs = self.evaluate(dense_features(features))
     self.assertAllClose([[0.]], inputs)

   def test_reuses_variables(self):
     with context.eager_mode():
       sparse_input = sparse_tensor.SparseTensor(
           indices=((0, 0), (1, 0), (2, 0)),
           values=(0, 1, 2),
           dense_shape=(3, 3))

       # Create feature columns (categorical and embedding).
       categorical_column = fc.categorical_column_with_identity(
           key='a', num_buckets=3)
       embedding_dimension = 2

       def _embedding_column_initializer(shape, dtype, partition_info=None):
         del shape  # unused
         del dtype  # unused
         del partition_info  # unused
         embedding_values = (
             (1, 0),  # id 0
             (0, 1),  # id 1
             (1, 1))  # id 2
         return embedding_values

       embedding_column = fc.embedding_column(
           categorical_column,
           dimension=embedding_dimension,
           initializer=_embedding_column_initializer)

       dense_features = df.DenseFeatures([embedding_column])
       features = {'a': sparse_input}

       inputs = dense_features(features)
       variables = dense_features.variables

       # Sanity check: test that the inputs are correct.
       self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs)

       # Check that only one variable was created.
       self.assertEqual(1, len(variables))

       # Check that invoking dense_features on the same features does not create
       # additional variables
       _ = dense_features(features)
       self.assertEqual(1, len(variables))
       self.assertEqual(variables[0], dense_features.variables[0])

   def test_feature_column_dense_features_gradient(self):
     with context.eager_mode():
       sparse_input = sparse_tensor.SparseTensor(
           indices=((0, 0), (1, 0), (2, 0)),
           values=(0, 1, 2),
           dense_shape=(3, 3))

       # Create feature columns (categorical and embedding).
       categorical_column = fc.categorical_column_with_identity(
           key='a', num_buckets=3)
       embedding_dimension = 2

       def _embedding_column_initializer(shape, dtype, partition_info=None):
         del shape  # unused
         del dtype  # unused
         del partition_info  # unused
         embedding_values = (
             (1, 0),  # id 0
             (0, 1),  # id 1
             (1, 1))  # id 2
         return embedding_values

       embedding_column = fc.embedding_column(
           categorical_column,
           dimension=embedding_dimension,
           initializer=_embedding_column_initializer)

       dense_features = df.DenseFeatures([embedding_column])
       features = {'a': sparse_input}

       def scale_matrix():
         matrix = dense_features(features)
         return 2 * matrix

       # Sanity check: Verify that scale_matrix returns the correct output.
       self.assertAllEqual([[2, 0], [0, 2], [2, 2]], scale_matrix())

       # Check that the returned gradient is correct.
       grad_function = backprop.implicit_grad(scale_matrix)
       grads_and_vars = grad_function()
       indexed_slice = grads_and_vars[0][0]
       gradient = grads_and_vars[0][0].values

       self.assertAllEqual([0, 1, 2], indexed_slice.indices)
       self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient)

   def test_raises_if_empty_feature_columns(self):
     with self.assertRaisesRegexp(ValueError,
                                  'feature_columns must not be empty'):
       df.DenseFeatures(feature_columns=[])(features={})

   def test_should_be_dense_column(self):
     with self.assertRaisesRegexp(ValueError, 'must be a .*DenseColumn'):
       df.DenseFeatures(feature_columns=[
           fc.categorical_column_with_hash_bucket('wire_cast', 4)
       ])(
           features={
               'a': [[0]]
           })

   def test_does_not_support_dict_columns(self):
     with self.assertRaisesRegexp(
         ValueError, 'Expected feature_columns to be iterable, found dict.'):
       df.DenseFeatures(feature_columns={'a': fc.numeric_column('a')})(
           features={
               'a': [[0]]
           })

   def test_bare_column(self):
     with ops.Graph().as_default():
       features = features = {'a': [0.]}
       net = df.DenseFeatures(fc.numeric_column('a'))(features)

       self.evaluate(variables_lib.global_variables_initializer())
       self.evaluate(lookup_ops.tables_initializer())

       self.assertAllClose([[0.]], self.evaluate(net))

   def test_column_generator(self):
     with ops.Graph().as_default():
       features = features = {'a': [0.], 'b': [1.]}
       columns = (fc.numeric_column(key) for key in features)
       net = df.DenseFeatures(columns)(features)

       self.evaluate(variables_lib.global_variables_initializer())
       self.evaluate(lookup_ops.tables_initializer())

       self.assertAllClose([[0., 1.]], self.evaluate(net))

   def test_raises_if_duplicate_name(self):
     with self.assertRaisesRegexp(
         ValueError, 'Duplicate feature column name found for columns'):
       df.DenseFeatures(
           feature_columns=[fc.numeric_column('a'),
                            fc.numeric_column('a')])(
                                features={
                                    'a': [[0]]
                                })

   def test_one_column(self):
     price = fc.numeric_column('price')
     with ops.Graph().as_default():
       features = {'price': [[1.], [5.]]}
       net = df.DenseFeatures([price])(features)

       self.evaluate(variables_lib.global_variables_initializer())
       self.evaluate(lookup_ops.tables_initializer())

       self.assertAllClose([[1.], [5.]], self.evaluate(net))

   def test_multi_dimension(self):
     price = fc.numeric_column('price', shape=2)
     with ops.Graph().as_default():
       features = {'price': [[1., 2.], [5., 6.]]}
       net = df.DenseFeatures([price])(features)

       self.evaluate(variables_lib.global_variables_initializer())
       self.evaluate(lookup_ops.tables_initializer())

       self.assertAllClose([[1., 2.], [5., 6.]], self.evaluate(net))

   def test_compute_output_shape(self):
     price1 = fc.numeric_column('price1', shape=2)
     price2 = fc.numeric_column('price2', shape=4)
     with ops.Graph().as_default():
       features = {
           'price1': [[1., 2.], [5., 6.]],
           'price2': [[3., 4., 5., 6.], [7., 8., 9., 10.]]
       }
       dense_features = df.DenseFeatures([price1, price2])
       self.assertEqual((None, 6), dense_features.compute_output_shape((None,)))
       net = dense_features(features)

       self.evaluate(variables_lib.global_variables_initializer())
       self.evaluate(lookup_ops.tables_initializer())

       self.assertAllClose([[1., 2., 3., 4., 5., 6.], [5., 6., 7., 8., 9., 10.]],
                           self.evaluate(net))

   def test_raises_if_shape_mismatch(self):
     price = fc.numeric_column('price', shape=2)
     with ops.Graph().as_default():
       features = {'price': [[1.], [5.]]}
       with self.assertRaisesRegexp(
           Exception,
           r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'):
         df.DenseFeatures([price])(features)

   def test_reshaping(self):
     price = fc.numeric_column('price', shape=[1, 2])
     with ops.Graph().as_default():
       features = {'price': [[[1., 2.]], [[5., 6.]]]}
       net = df.DenseFeatures([price])(features)

       self.evaluate(variables_lib.global_variables_initializer())
       self.evaluate(lookup_ops.tables_initializer())

       self.assertAllClose([[1., 2.], [5., 6.]], self.evaluate(net))

   def test_multi_column(self):
     price1 = fc.numeric_column('price1', shape=2)
     price2 = fc.numeric_column('price2')
     with ops.Graph().as_default():
       features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
       net = df.DenseFeatures([price1, price2])(features)

       self.evaluate(variables_lib.global_variables_initializer())
       self.evaluate(lookup_ops.tables_initializer())

       self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], self.evaluate(net))

   def test_cols_to_output_tensors(self):
     price1 = fc.numeric_column('price1', shape=2)
     price2 = fc.numeric_column('price2')
     with ops.Graph().as_default():
       cols_dict = {}
       features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
       dense_features = df.DenseFeatures([price1, price2])
       net = dense_features(features, cols_dict)

       self.evaluate(variables_lib.global_variables_initializer())
       self.evaluate(lookup_ops.tables_initializer())

       self.assertAllClose([[1., 2.], [5., 6.]],
                           self.evaluate(cols_dict[price1]))
       self.assertAllClose([[3.], [4.]], self.evaluate(cols_dict[price2]))
       self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], self.evaluate(net))

   def test_column_order(self):
     price_a = fc.numeric_column('price_a')
     price_b = fc.numeric_column('price_b')
     with ops.Graph().as_default():
       features = {
           'price_a': [[1.]],
           'price_b': [[3.]],
       }
       net1 = df.DenseFeatures([price_a, price_b])(features)
       net2 = df.DenseFeatures([price_b, price_a])(features)

       self.evaluate(variables_lib.global_variables_initializer())
       self.evaluate(lookup_ops.tables_initializer())

       self.assertAllClose([[1., 3.]], self.evaluate(net1))
       self.assertAllClose([[1., 3.]], self.evaluate(net2))

   def test_fails_for_categorical_column(self):
     animal = fc.categorical_column_with_identity('animal', num_buckets=4)
     with ops.Graph().as_default():
       features = {
           'animal':
               sparse_tensor.SparseTensor(
                   indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
       }
       with self.assertRaisesRegexp(Exception, 'must be a .*DenseColumn'):
         df.DenseFeatures([animal])(features)

   def test_static_batch_size_mismatch(self):
     price1 = fc.numeric_column('price1')
     price2 = fc.numeric_column('price2')
     with ops.Graph().as_default():
       features = {
           'price1': [[1.], [5.], [7.]],  # batchsize = 3
           'price2': [[3.], [4.]]  # batchsize = 2
       }
       with self.assertRaisesRegexp(
           ValueError,
           r'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
         df.DenseFeatures([price1, price2])(features)

   def test_subset_of_static_batch_size_mismatch(self):
     price1 = fc.numeric_column('price1')
     price2 = fc.numeric_column('price2')
     price3 = fc.numeric_column('price3')
     with ops.Graph().as_default():
       features = {
           'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
           'price2': [[3.], [4.]],  # batchsize = 2
           'price3': [[3.], [4.], [5.]]  # batchsize = 3
       }
       with self.assertRaisesRegexp(
           ValueError,
           r'Batch size \(first dimension\) of each feature must be same.'):  # pylint: disable=anomalous-backslash-in-string
         df.DenseFeatures([price1, price2, price3])(features)

   def test_runtime_batch_size_mismatch(self):
     price1 = fc.numeric_column('price1')
     price2 = fc.numeric_column('price2')
     with ops.Graph().as_default():
       features = {
           'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 3
           'price2': [[3.], [4.]]  # batchsize = 2
       }
       net = df.DenseFeatures([price1, price2])(features)
       with _initialized_session() as sess:
         with self.assertRaisesRegexp(errors.OpError,
                                      'Dimensions of inputs should match'):
           sess.run(net, feed_dict={features['price1']: [[1.], [5.], [7.]]})

   def test_runtime_batch_size_matches(self):
     price1 = fc.numeric_column('price1')
     price2 = fc.numeric_column('price2')
     with ops.Graph().as_default():
       features = {
           'price1': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
           'price2': array_ops.placeholder(dtype=dtypes.int64),  # batchsize = 2
       }
       net = df.DenseFeatures([price1, price2])(features)
       with _initialized_session() as sess:
         sess.run(
             net,
             feed_dict={
                 features['price1']: [[1.], [5.]],
                 features['price2']: [[1.], [5.]],
             })

   def test_multiple_layers_with_same_embedding_column(self):
     some_sparse_column = fc.categorical_column_with_hash_bucket(
         'sparse_feature', hash_bucket_size=5)
     some_embedding_column = fc.embedding_column(
         some_sparse_column, dimension=10)

     with ops.Graph().as_default():
       features = {
           'sparse_feature': [['a'], ['x']],
       }
       all_cols = [some_embedding_column]
       df.DenseFeatures(all_cols)(features)
       df.DenseFeatures(all_cols)(features)
       # Make sure that 2 variables get created in this case.
       self.assertEqual(2,
                        len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
       expected_var_names = [
           'dense_features/sparse_feature_embedding/embedding_weights:0',
           'dense_features_1/sparse_feature_embedding/embedding_weights:0'
       ]
       self.assertItemsEqual(
           expected_var_names,
           [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])

   @test_util.run_deprecated_v1
   def test_multiple_layers_with_same_shared_embedding_column(self):
     categorical_column_a = fc.categorical_column_with_identity(
         key='aaa', num_buckets=3)
     categorical_column_b = fc.categorical_column_with_identity(
         key='bbb', num_buckets=3)
     embedding_dimension = 2
     embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
         [categorical_column_b, categorical_column_a],
         dimension=embedding_dimension)

     with ops.Graph().as_default():
       features = {
           'aaa':
               sparse_tensor.SparseTensor(
                   indices=((0, 0), (1, 0), (1, 1)),
                   values=(0, 1, 0),
                   dense_shape=(2, 2)),
           'bbb':
               sparse_tensor.SparseTensor(
                   indices=((0, 0), (1, 0), (1, 1)),
                   values=(1, 2, 1),
                   dense_shape=(2, 2)),
       }
       all_cols = [embedding_column_a, embedding_column_b]
       df.DenseFeatures(all_cols)(features)
       df.DenseFeatures(all_cols)(features)
       # Make sure that only 1 variable gets created in this case.
       self.assertEqual(1,
                        len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
       self.assertItemsEqual(
           ['aaa_bbb_shared_embedding:0'],
           [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])

   @test_util.run_deprecated_v1
   def test_multiple_layers_with_same_shared_embedding_column_diff_graphs(self):
     categorical_column_a = fc.categorical_column_with_identity(
         key='aaa', num_buckets=3)
     categorical_column_b = fc.categorical_column_with_identity(
         key='bbb', num_buckets=3)
     embedding_dimension = 2
     embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
         [categorical_column_b, categorical_column_a],
         dimension=embedding_dimension)
     all_cols = [embedding_column_a, embedding_column_b]

     with ops.Graph().as_default():
       features = {
           'aaa':
               sparse_tensor.SparseTensor(
                   indices=((0, 0), (1, 0), (1, 1)),
                   values=(0, 1, 0),
                   dense_shape=(2, 2)),
           'bbb':
               sparse_tensor.SparseTensor(
                   indices=((0, 0), (1, 0), (1, 1)),
                   values=(1, 2, 1),
                   dense_shape=(2, 2)),
       }
       df.DenseFeatures(all_cols)(features)
       # Make sure that only 1 variable gets created in this case.
       self.assertEqual(1,
                        len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))

     with ops.Graph().as_default():
       features1 = {
           'aaa':
               sparse_tensor.SparseTensor(
                   indices=((0, 0), (1, 0), (1, 1)),
                   values=(0, 1, 0),
                   dense_shape=(2, 2)),
           'bbb':
               sparse_tensor.SparseTensor(
                   indices=((0, 0), (1, 0), (1, 1)),
                   values=(1, 2, 1),
                   dense_shape=(2, 2)),
       }

       df.DenseFeatures(all_cols)(features1)
       # Make sure that only 1 variable gets created in this case.
       self.assertEqual(1,
                        len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
       self.assertItemsEqual(
           ['aaa_bbb_shared_embedding:0'],
           [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])

   @test_util.run_deprecated_v1
   def test_with_1d_sparse_tensor(self):
     embedding_values = (
         (1., 2., 3., 4., 5.),  # id 0
         (6., 7., 8., 9., 10.),  # id 1
         (11., 12., 13., 14., 15.)  # id 2
     )

     def _initializer(shape, dtype, partition_info=None):
       del shape, dtype, partition_info
       return embedding_values

     # price has 1 dimension in dense_features
     price = fc.numeric_column('price')

     # one_hot_body_style has 3 dims in dense_features.
     body_style = fc.categorical_column_with_vocabulary_list(
         'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
     one_hot_body_style = fc.indicator_column(body_style)

     # embedded_body_style has 5 dims in dense_features.
     country = fc.categorical_column_with_vocabulary_list(
         'country', vocabulary_list=['US', 'JP', 'CA'])
     embedded_country = fc.embedding_column(
         country, dimension=5, initializer=_initializer)

     # Provides 1-dim tensor and dense tensor.
     features = {
         'price':
             constant_op.constant([
                 11.,
                 12.,
             ]),
         'body-style':
             sparse_tensor.SparseTensor(
                 indices=((0,), (1,)),
                 values=('sedan', 'hardtop'),
                 dense_shape=(2,)),
         # This is dense tensor for the categorical_column.
         'country':
             constant_op.constant(['CA', 'US']),
     }
     self.assertEqual(1, features['price'].shape.ndims)
     self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0])
     self.assertEqual(1, features['country'].shape.ndims)

     net = df.DenseFeatures([price, one_hot_body_style, embedded_country])(
         features)
     self.assertEqual(1 + 3 + 5, net.shape[1])
     with _initialized_session() as sess:

       # Each row is formed by concatenating `embedded_body_style`,
       # `one_hot_body_style`, and `price` in order.
       self.assertAllEqual([[0., 0., 1., 11., 12., 13., 14., 15., 11.],
                            [1., 0., 0., 1., 2., 3., 4., 5., 12.]],
                           sess.run(net))

   @test_util.run_deprecated_v1
   def test_with_1d_unknown_shape_sparse_tensor(self):
     embedding_values = (
         (1., 2.),  # id 0
         (6., 7.),  # id 1
         (11., 12.)  # id 2
     )

     def _initializer(shape, dtype, partition_info=None):
       del shape, dtype, partition_info
       return embedding_values

     # price has 1 dimension in dense_features
     price = fc.numeric_column('price')

     # one_hot_body_style has 3 dims in dense_features.
     body_style = fc.categorical_column_with_vocabulary_list(
         'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
     one_hot_body_style = fc.indicator_column(body_style)

     # embedded_body_style has 5 dims in dense_features.
     country = fc.categorical_column_with_vocabulary_list(
         'country', vocabulary_list=['US', 'JP', 'CA'])
     embedded_country = fc.embedding_column(
         country, dimension=2, initializer=_initializer)

     # Provides 1-dim tensor and dense tensor.
     features = {
         'price': array_ops.placeholder(dtypes.float32),
         'body-style': array_ops.sparse_placeholder(dtypes.string),
         # This is dense tensor for the categorical_column.
         'country': array_ops.placeholder(dtypes.string),
     }
     self.assertIsNone(features['price'].shape.ndims)
     self.assertIsNone(features['body-style'].get_shape().ndims)
     self.assertIsNone(features['country'].shape.ndims)

     price_data = np.array([11., 12.])
     body_style_data = sparse_tensor.SparseTensorValue(
         indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,))
     country_data = np.array([['US'], ['CA']])

     net = df.DenseFeatures([price, one_hot_body_style, embedded_country])(
         features)
     self.assertEqual(1 + 3 + 2, net.shape[1])
     with _initialized_session() as sess:

       # Each row is formed by concatenating `embedded_body_style`,
       # `one_hot_body_style`, and `price` in order.
       self.assertAllEqual(
           [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]],
           sess.run(
               net,
               feed_dict={
                   features['price']: price_data,
                   features['body-style']: body_style_data,
                   features['country']: country_data
               }))

   @test_util.run_deprecated_v1
   def test_with_rank_0_feature(self):
     # price has 1 dimension in dense_features
     price = fc.numeric_column('price')
     features = {
         'price': constant_op.constant(0),
     }
     self.assertEqual(0, features['price'].shape.ndims)

     # Static rank 0 should fail
     with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'):
       df.DenseFeatures([price])(features)

     # Dynamic rank 0 should fail
     features = {
         'price': array_ops.placeholder(dtypes.float32),
     }
     net = df.DenseFeatures([price])(features)
     self.assertEqual(1, net.shape[1])
     with _initialized_session() as sess:
       with self.assertRaisesOpError('Feature .* cannot have rank 0'):
         sess.run(net, feed_dict={features['price']: np.array(1)})


 if __name__ == '__main__':
   test.main()
	# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================
	"""Tests for dense_features_v2."""

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import numpy as np

	from tensorflow.python.client import session
	from tensorflow.python.eager import backprop
	from tensorflow.python.eager import context
	from tensorflow.python.feature_column import dense_features_v2 as df
	from tensorflow.python.feature_column import feature_column_v2 as fc
	from tensorflow.python.framework import constant_op
	from tensorflow.python.framework import dtypes
	from tensorflow.python.framework import errors
	from tensorflow.python.framework import ops
	from tensorflow.python.framework import sparse_tensor
	from tensorflow.python.framework import test_util
	from tensorflow.python.ops import array_ops
	from tensorflow.python.ops import lookup_ops
	from tensorflow.python.ops import variables as variables_lib
	from tensorflow.python.platform import test


	def _initialized_session(config=None):
	sess = session.Session(config=config)
	sess.run(variables_lib.global_variables_initializer())
	sess.run(lookup_ops.tables_initializer())
	return sess


	class DenseFeaturesTest(test.TestCase):

	@test_util.run_in_graph_and_eager_modes()
	def test_retrieving_input(self):
	features = {'a': [0.]}
	dense_features = df.DenseFeatures(fc.numeric_column('a'))
	inputs = self.evaluate(dense_features(features))
	self.assertAllClose([[0.]], inputs)

	def test_reuses_variables(self):
	with context.eager_mode():
	sparse_input = sparse_tensor.SparseTensor(
	indices=((0, 0), (1, 0), (2, 0)),
	values=(0, 1, 2),
	dense_shape=(3, 3))

	# Create feature columns (categorical and embedding).
	categorical_column = fc.categorical_column_with_identity(
	key='a', num_buckets=3)
	embedding_dimension = 2

	def _embedding_column_initializer(shape, dtype, partition_info=None):
	del shape # unused
	del dtype # unused
	del partition_info # unused
	embedding_values = (
	(1, 0), # id 0
	(0, 1), # id 1
	(1, 1)) # id 2
	return embedding_values

	embedding_column = fc.embedding_column(
	categorical_column,
	dimension=embedding_dimension,
	initializer=_embedding_column_initializer)

	dense_features = df.DenseFeatures([embedding_column])
	features = {'a': sparse_input}

	inputs = dense_features(features)
	variables = dense_features.variables

	# Sanity check: test that the inputs are correct.
	self.assertAllEqual([[1, 0], [0, 1], [1, 1]], inputs)

	# Check that only one variable was created.
	self.assertEqual(1, len(variables))

	# Check that invoking dense_features on the same features does not create
	# additional variables
	_ = dense_features(features)
	self.assertEqual(1, len(variables))
	self.assertEqual(variables[0], dense_features.variables[0])

	def test_feature_column_dense_features_gradient(self):
	with context.eager_mode():
	sparse_input = sparse_tensor.SparseTensor(
	indices=((0, 0), (1, 0), (2, 0)),
	values=(0, 1, 2),
	dense_shape=(3, 3))

	# Create feature columns (categorical and embedding).
	categorical_column = fc.categorical_column_with_identity(
	key='a', num_buckets=3)
	embedding_dimension = 2

	def _embedding_column_initializer(shape, dtype, partition_info=None):
	del shape # unused
	del dtype # unused
	del partition_info # unused
	embedding_values = (
	(1, 0), # id 0
	(0, 1), # id 1
	(1, 1)) # id 2
	return embedding_values

	embedding_column = fc.embedding_column(
	categorical_column,
	dimension=embedding_dimension,
	initializer=_embedding_column_initializer)

	dense_features = df.DenseFeatures([embedding_column])
	features = {'a': sparse_input}

	def scale_matrix():
	matrix = dense_features(features)
	return 2 * matrix

	# Sanity check: Verify that scale_matrix returns the correct output.
	self.assertAllEqual([[2, 0], [0, 2], [2, 2]], scale_matrix())

	# Check that the returned gradient is correct.
	grad_function = backprop.implicit_grad(scale_matrix)
	grads_and_vars = grad_function()
	indexed_slice = grads_and_vars[0][0]
	gradient = grads_and_vars[0][0].values

	self.assertAllEqual([0, 1, 2], indexed_slice.indices)
	self.assertAllEqual([[2, 2], [2, 2], [2, 2]], gradient)

	def test_raises_if_empty_feature_columns(self):
	with self.assertRaisesRegexp(ValueError,
	'feature_columns must not be empty'):
	df.DenseFeatures(feature_columns=[])(features={})

	def test_should_be_dense_column(self):
	with self.assertRaisesRegexp(ValueError, 'must be a .*DenseColumn'):
	df.DenseFeatures(feature_columns=[
	fc.categorical_column_with_hash_bucket('wire_cast', 4)
	])(
	features={
	'a': [[0]]
	})

	def test_does_not_support_dict_columns(self):
	with self.assertRaisesRegexp(
	ValueError, 'Expected feature_columns to be iterable, found dict.'):
	df.DenseFeatures(feature_columns={'a': fc.numeric_column('a')})(
	features={
	'a': [[0]]
	})

	def test_bare_column(self):
	with ops.Graph().as_default():
	features = features = {'a': [0.]}
	net = df.DenseFeatures(fc.numeric_column('a'))(features)

	self.evaluate(variables_lib.global_variables_initializer())
	self.evaluate(lookup_ops.tables_initializer())

	self.assertAllClose([[0.]], self.evaluate(net))

	def test_column_generator(self):
	with ops.Graph().as_default():
	features = features = {'a': [0.], 'b': [1.]}
	columns = (fc.numeric_column(key) for key in features)
	net = df.DenseFeatures(columns)(features)

	self.evaluate(variables_lib.global_variables_initializer())
	self.evaluate(lookup_ops.tables_initializer())

	self.assertAllClose([[0., 1.]], self.evaluate(net))

	def test_raises_if_duplicate_name(self):
	with self.assertRaisesRegexp(
	ValueError, 'Duplicate feature column name found for columns'):
	df.DenseFeatures(
	feature_columns=[fc.numeric_column('a'),
	fc.numeric_column('a')])(
	features={
	'a': [[0]]
	})

	def test_one_column(self):
	price = fc.numeric_column('price')
	with ops.Graph().as_default():
	features = {'price': [[1.], [5.]]}
	net = df.DenseFeatures([price])(features)

	self.evaluate(variables_lib.global_variables_initializer())
	self.evaluate(lookup_ops.tables_initializer())

	self.assertAllClose([[1.], [5.]], self.evaluate(net))

	def test_multi_dimension(self):
	price = fc.numeric_column('price', shape=2)
	with ops.Graph().as_default():
	features = {'price': [[1., 2.], [5., 6.]]}
	net = df.DenseFeatures([price])(features)

	self.evaluate(variables_lib.global_variables_initializer())
	self.evaluate(lookup_ops.tables_initializer())

	self.assertAllClose([[1., 2.], [5., 6.]], self.evaluate(net))

	def test_compute_output_shape(self):
	price1 = fc.numeric_column('price1', shape=2)
	price2 = fc.numeric_column('price2', shape=4)
	with ops.Graph().as_default():
	features = {
	'price1': [[1., 2.], [5., 6.]],
	'price2': [[3., 4., 5., 6.], [7., 8., 9., 10.]]
	}
	dense_features = df.DenseFeatures([price1, price2])
	self.assertEqual((None, 6), dense_features.compute_output_shape((None,)))
	net = dense_features(features)

	self.evaluate(variables_lib.global_variables_initializer())
	self.evaluate(lookup_ops.tables_initializer())

	self.assertAllClose([[1., 2., 3., 4., 5., 6.], [5., 6., 7., 8., 9., 10.]],
	self.evaluate(net))

	def test_raises_if_shape_mismatch(self):
	price = fc.numeric_column('price', shape=2)
	with ops.Graph().as_default():
	features = {'price': [[1.], [5.]]}
	with self.assertRaisesRegexp(
	Exception,
	r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'):
	df.DenseFeatures([price])(features)

	def test_reshaping(self):
	price = fc.numeric_column('price', shape=[1, 2])
	with ops.Graph().as_default():
	features = {'price': [[[1., 2.]], [[5., 6.]]]}
	net = df.DenseFeatures([price])(features)

	self.evaluate(variables_lib.global_variables_initializer())
	self.evaluate(lookup_ops.tables_initializer())

	self.assertAllClose([[1., 2.], [5., 6.]], self.evaluate(net))

	def test_multi_column(self):
	price1 = fc.numeric_column('price1', shape=2)
	price2 = fc.numeric_column('price2')
	with ops.Graph().as_default():
	features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
	net = df.DenseFeatures([price1, price2])(features)

	self.evaluate(variables_lib.global_variables_initializer())
	self.evaluate(lookup_ops.tables_initializer())

	self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], self.evaluate(net))

	def test_cols_to_output_tensors(self):
	price1 = fc.numeric_column('price1', shape=2)
	price2 = fc.numeric_column('price2')
	with ops.Graph().as_default():
	cols_dict = {}
	features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]}
	dense_features = df.DenseFeatures([price1, price2])
	net = dense_features(features, cols_dict)

	self.evaluate(variables_lib.global_variables_initializer())
	self.evaluate(lookup_ops.tables_initializer())

	self.assertAllClose([[1., 2.], [5., 6.]],
	self.evaluate(cols_dict[price1]))
	self.assertAllClose([[3.], [4.]], self.evaluate(cols_dict[price2]))
	self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], self.evaluate(net))

	def test_column_order(self):
	price_a = fc.numeric_column('price_a')
	price_b = fc.numeric_column('price_b')
	with ops.Graph().as_default():
	features = {
	'price_a': [[1.]],
	'price_b': [[3.]],
	}
	net1 = df.DenseFeatures([price_a, price_b])(features)
	net2 = df.DenseFeatures([price_b, price_a])(features)

	self.evaluate(variables_lib.global_variables_initializer())
	self.evaluate(lookup_ops.tables_initializer())

	self.assertAllClose([[1., 3.]], self.evaluate(net1))
	self.assertAllClose([[1., 3.]], self.evaluate(net2))

	def test_fails_for_categorical_column(self):
	animal = fc.categorical_column_with_identity('animal', num_buckets=4)
	with ops.Graph().as_default():
	features = {
	'animal':
	sparse_tensor.SparseTensor(
	indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2])
	}
	with self.assertRaisesRegexp(Exception, 'must be a .*DenseColumn'):
	df.DenseFeatures([animal])(features)

	def test_static_batch_size_mismatch(self):
	price1 = fc.numeric_column('price1')
	price2 = fc.numeric_column('price2')
	with ops.Graph().as_default():
	features = {
	'price1': [[1.], [5.], [7.]], # batchsize = 3
	'price2': [[3.], [4.]] # batchsize = 2
	}
	with self.assertRaisesRegexp(
	ValueError,
	r'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string
	df.DenseFeatures([price1, price2])(features)

	def test_subset_of_static_batch_size_mismatch(self):
	price1 = fc.numeric_column('price1')
	price2 = fc.numeric_column('price2')
	price3 = fc.numeric_column('price3')
	with ops.Graph().as_default():
	features = {
	'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3
	'price2': [[3.], [4.]], # batchsize = 2
	'price3': [[3.], [4.], [5.]] # batchsize = 3
	}
	with self.assertRaisesRegexp(
	ValueError,
	r'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string
	df.DenseFeatures([price1, price2, price3])(features)

	def test_runtime_batch_size_mismatch(self):
	price1 = fc.numeric_column('price1')
	price2 = fc.numeric_column('price2')
	with ops.Graph().as_default():
	features = {
	'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3
	'price2': [[3.], [4.]] # batchsize = 2
	}
	net = df.DenseFeatures([price1, price2])(features)
	with _initialized_session() as sess:
	with self.assertRaisesRegexp(errors.OpError,
	'Dimensions of inputs should match'):
	sess.run(net, feed_dict={features['price1']: [[1.], [5.], [7.]]})

	def test_runtime_batch_size_matches(self):
	price1 = fc.numeric_column('price1')
	price2 = fc.numeric_column('price2')
	with ops.Graph().as_default():
	features = {
	'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2
	'price2': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2
	}
	net = df.DenseFeatures([price1, price2])(features)
	with _initialized_session() as sess:
	sess.run(
	net,
	feed_dict={
	features['price1']: [[1.], [5.]],
	features['price2']: [[1.], [5.]],
	})

	def test_multiple_layers_with_same_embedding_column(self):
	some_sparse_column = fc.categorical_column_with_hash_bucket(
	'sparse_feature', hash_bucket_size=5)
	some_embedding_column = fc.embedding_column(
	some_sparse_column, dimension=10)

	with ops.Graph().as_default():
	features = {
	'sparse_feature': [['a'], ['x']],
	}
	all_cols = [some_embedding_column]
	df.DenseFeatures(all_cols)(features)
	df.DenseFeatures(all_cols)(features)
	# Make sure that 2 variables get created in this case.
	self.assertEqual(2,
	len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
	expected_var_names = [
	'dense_features/sparse_feature_embedding/embedding_weights:0',
	'dense_features_1/sparse_feature_embedding/embedding_weights:0'
	]
	self.assertItemsEqual(
	expected_var_names,
	[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])

	@test_util.run_deprecated_v1
	def test_multiple_layers_with_same_shared_embedding_column(self):
	categorical_column_a = fc.categorical_column_with_identity(
	key='aaa', num_buckets=3)
	categorical_column_b = fc.categorical_column_with_identity(
	key='bbb', num_buckets=3)
	embedding_dimension = 2
	embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
	[categorical_column_b, categorical_column_a],
	dimension=embedding_dimension)

	with ops.Graph().as_default():
	features = {
	'aaa':
	sparse_tensor.SparseTensor(
	indices=((0, 0), (1, 0), (1, 1)),
	values=(0, 1, 0),
	dense_shape=(2, 2)),
	'bbb':
	sparse_tensor.SparseTensor(
	indices=((0, 0), (1, 0), (1, 1)),
	values=(1, 2, 1),
	dense_shape=(2, 2)),
	}
	all_cols = [embedding_column_a, embedding_column_b]
	df.DenseFeatures(all_cols)(features)
	df.DenseFeatures(all_cols)(features)
	# Make sure that only 1 variable gets created in this case.
	self.assertEqual(1,
	len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
	self.assertItemsEqual(
	['aaa_bbb_shared_embedding:0'],
	[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])

	@test_util.run_deprecated_v1
	def test_multiple_layers_with_same_shared_embedding_column_diff_graphs(self):
	categorical_column_a = fc.categorical_column_with_identity(
	key='aaa', num_buckets=3)
	categorical_column_b = fc.categorical_column_with_identity(
	key='bbb', num_buckets=3)
	embedding_dimension = 2
	embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2(
	[categorical_column_b, categorical_column_a],
	dimension=embedding_dimension)
	all_cols = [embedding_column_a, embedding_column_b]

	with ops.Graph().as_default():
	features = {
	'aaa':
	sparse_tensor.SparseTensor(
	indices=((0, 0), (1, 0), (1, 1)),
	values=(0, 1, 0),
	dense_shape=(2, 2)),
	'bbb':
	sparse_tensor.SparseTensor(
	indices=((0, 0), (1, 0), (1, 1)),
	values=(1, 2, 1),
	dense_shape=(2, 2)),
	}
	df.DenseFeatures(all_cols)(features)
	# Make sure that only 1 variable gets created in this case.
	self.assertEqual(1,
	len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))

	with ops.Graph().as_default():
	features1 = {
	'aaa':
	sparse_tensor.SparseTensor(
	indices=((0, 0), (1, 0), (1, 1)),
	values=(0, 1, 0),
	dense_shape=(2, 2)),
	'bbb':
	sparse_tensor.SparseTensor(
	indices=((0, 0), (1, 0), (1, 1)),
	values=(1, 2, 1),
	dense_shape=(2, 2)),
	}

	df.DenseFeatures(all_cols)(features1)
	# Make sure that only 1 variable gets created in this case.
	self.assertEqual(1,
	len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)))
	self.assertItemsEqual(
	['aaa_bbb_shared_embedding:0'],
	[v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)])

	@test_util.run_deprecated_v1
	def test_with_1d_sparse_tensor(self):
	embedding_values = (
	(1., 2., 3., 4., 5.), # id 0
	(6., 7., 8., 9., 10.), # id 1
	(11., 12., 13., 14., 15.) # id 2
	)

	def _initializer(shape, dtype, partition_info=None):
	del shape, dtype, partition_info
	return embedding_values

	# price has 1 dimension in dense_features
	price = fc.numeric_column('price')

	# one_hot_body_style has 3 dims in dense_features.
	body_style = fc.categorical_column_with_vocabulary_list(
	'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
	one_hot_body_style = fc.indicator_column(body_style)

	# embedded_body_style has 5 dims in dense_features.
	country = fc.categorical_column_with_vocabulary_list(
	'country', vocabulary_list=['US', 'JP', 'CA'])
	embedded_country = fc.embedding_column(
	country, dimension=5, initializer=_initializer)

	# Provides 1-dim tensor and dense tensor.
	features = {
	'price':
	constant_op.constant([
	11.,
	12.,
	]),
	'body-style':
	sparse_tensor.SparseTensor(
	indices=((0,), (1,)),
	values=('sedan', 'hardtop'),
	dense_shape=(2,)),
	# This is dense tensor for the categorical_column.
	'country':
	constant_op.constant(['CA', 'US']),
	}
	self.assertEqual(1, features['price'].shape.ndims)
	self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0])
	self.assertEqual(1, features['country'].shape.ndims)

	net = df.DenseFeatures([price, one_hot_body_style, embedded_country])(
	features)
	self.assertEqual(1 + 3 + 5, net.shape[1])
	with _initialized_session() as sess:

	# Each row is formed by concatenating `embedded_body_style`,
	# `one_hot_body_style`, and `price` in order.
	self.assertAllEqual([[0., 0., 1., 11., 12., 13., 14., 15., 11.],
	[1., 0., 0., 1., 2., 3., 4., 5., 12.]],
	sess.run(net))

	@test_util.run_deprecated_v1
	def test_with_1d_unknown_shape_sparse_tensor(self):
	embedding_values = (
	(1., 2.), # id 0
	(6., 7.), # id 1
	(11., 12.) # id 2
	)

	def _initializer(shape, dtype, partition_info=None):
	del shape, dtype, partition_info
	return embedding_values

	# price has 1 dimension in dense_features
	price = fc.numeric_column('price')

	# one_hot_body_style has 3 dims in dense_features.
	body_style = fc.categorical_column_with_vocabulary_list(
	'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
	one_hot_body_style = fc.indicator_column(body_style)

	# embedded_body_style has 5 dims in dense_features.
	country = fc.categorical_column_with_vocabulary_list(
	'country', vocabulary_list=['US', 'JP', 'CA'])
	embedded_country = fc.embedding_column(
	country, dimension=2, initializer=_initializer)

	# Provides 1-dim tensor and dense tensor.
	features = {
	'price': array_ops.placeholder(dtypes.float32),
	'body-style': array_ops.sparse_placeholder(dtypes.string),
	# This is dense tensor for the categorical_column.
	'country': array_ops.placeholder(dtypes.string),
	}
	self.assertIsNone(features['price'].shape.ndims)
	self.assertIsNone(features['body-style'].get_shape().ndims)
	self.assertIsNone(features['country'].shape.ndims)

	price_data = np.array([11., 12.])
	body_style_data = sparse_tensor.SparseTensorValue(
	indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,))
	country_data = np.array([['US'], ['CA']])

	net = df.DenseFeatures([price, one_hot_body_style, embedded_country])(
	features)
	self.assertEqual(1 + 3 + 2, net.shape[1])
	with _initialized_session() as sess:

	# Each row is formed by concatenating `embedded_body_style`,
	# `one_hot_body_style`, and `price` in order.
	self.assertAllEqual(
	[[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]],
	sess.run(
	net,
	feed_dict={
	features['price']: price_data,
	features['body-style']: body_style_data,
	features['country']: country_data
	}))

	@test_util.run_deprecated_v1
	def test_with_rank_0_feature(self):
	# price has 1 dimension in dense_features
	price = fc.numeric_column('price')
	features = {
	'price': constant_op.constant(0),
	}
	self.assertEqual(0, features['price'].shape.ndims)

	# Static rank 0 should fail
	with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'):
	df.DenseFeatures([price])(features)

	# Dynamic rank 0 should fail
	features = {
	'price': array_ops.placeholder(dtypes.float32),
	}
	net = df.DenseFeatures([price])(features)
	self.assertEqual(1, net.shape[1])
	with _initialized_session() as sess:
	with self.assertRaisesOpError('Feature .* cannot have rank 0'):
	sess.run(net, feed_dict={features['price']: np.array(1)})


	if __name__ == '__main__':
	test.main()