| # Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============================================================================== |
| r"""Runs a trained audio graph against WAVE files and reports the results. |
| |
| The model, labels and .wav files specified in the arguments will be loaded, and |
| then the predictions from running the model against the audio data will be |
| printed to the console. This is a useful script for sanity checking trained |
| models, and as an example of how to use an audio model from Python. |
| |
| Here's an example of running it: |
| |
| python tensorflow/examples/speech_commands/label_wav_dir.py \ |
| --graph=/tmp/my_frozen_graph.pb \ |
| --labels=/tmp/speech_commands_train/conv_labels.txt \ |
| --wav_dir=/tmp/speech_dataset/left |
| |
| """ |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| |
| import argparse |
| import glob |
| import sys |
| |
| import tensorflow as tf |
| |
| # pylint: disable=unused-import |
| from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio |
| # pylint: enable=unused-import |
| |
| FLAGS = None |
| |
| |
| def load_graph(filename): |
| """Unpersists graph from file as default graph.""" |
| with tf.io.gfile.GFile(filename, 'rb') as f: |
| graph_def = tf.compat.v1.GraphDef() |
| graph_def.ParseFromString(f.read()) |
| tf.import_graph_def(graph_def, name='') |
| |
| |
| def load_labels(filename): |
| """Read in labels, one label per line.""" |
| return [line.rstrip() for line in tf.io.gfile.GFile(filename)] |
| |
| |
| def run_graph(wav_dir, labels, input_layer_name, output_layer_name, |
| num_top_predictions): |
| """Runs the audio data through the graph and prints predictions.""" |
| with tf.compat.v1.Session() as sess: |
| # Feed the audio data as input to the graph. |
| # predictions will contain a two-dimensional array, where one |
| # dimension represents the input image count, and the other has |
| # predictions per class |
| for wav_path in glob.glob(wav_dir + '/*.wav'): |
| if not wav_path or not tf.io.gfile.exists(wav_path): |
| tf.compat.v1.logging.fatal('Audio file does not exist %s', wav_path) |
| |
| with open(wav_path, 'rb') as wav_file: |
| wav_data = wav_file.read() |
| |
| softmax_tensor = sess.graph.get_tensor_by_name(output_layer_name) |
| predictions, = sess.run(softmax_tensor, {input_layer_name: wav_data}) |
| |
| # Sort to show labels in order of confidence |
| print('\n%s' % (wav_path.split('/')[-1])) |
| top_k = predictions.argsort()[-num_top_predictions:][::-1] |
| for node_id in top_k: |
| human_string = labels[node_id] |
| score = predictions[node_id] |
| print('%s (score = %.5f)' % (human_string, score)) |
| |
| return 0 |
| |
| |
| def label_wav(wav_dir, labels, graph, input_name, output_name, how_many_labels): |
| """Loads the model and labels, and runs the inference to print predictions.""" |
| if not labels or not tf.io.gfile.exists(labels): |
| tf.compat.v1.logging.fatal('Labels file does not exist %s', labels) |
| |
| if not graph or not tf.io.gfile.exists(graph): |
| tf.compat.v1.logging.fatal('Graph file does not exist %s', graph) |
| |
| labels_list = load_labels(labels) |
| |
| # load graph, which is stored in the default session |
| load_graph(graph) |
| |
| run_graph(wav_dir, labels_list, input_name, output_name, how_many_labels) |
| |
| |
| def main(_): |
| """Entry point for script, converts flags to arguments.""" |
| label_wav(FLAGS.wav_dir, FLAGS.labels, FLAGS.graph, FLAGS.input_name, |
| FLAGS.output_name, FLAGS.how_many_labels) |
| |
| |
| if __name__ == '__main__': |
| parser = argparse.ArgumentParser() |
| parser.add_argument( |
| '--wav_dir', type=str, default='', help='Audio file to be identified.') |
| parser.add_argument( |
| '--graph', type=str, default='', help='Model to use for identification.') |
| parser.add_argument( |
| '--labels', type=str, default='', help='Path to file containing labels.') |
| parser.add_argument( |
| '--input_name', |
| type=str, |
| default='wav_data:0', |
| help='Name of WAVE data input node in model.') |
| parser.add_argument( |
| '--output_name', |
| type=str, |
| default='labels_softmax:0', |
| help='Name of node outputting a prediction in the model.') |
| parser.add_argument( |
| '--how_many_labels', |
| type=int, |
| default=3, |
| help='Number of results to show.') |
| |
| FLAGS, unparsed = parser.parse_known_args() |
| tf.compat.v1.app.run(main=main, argv=[sys.argv[0]] + unparsed) |