blob: 3f398842abac995178ee0cb59e1f9ebc4c392d17 [file] [log] [blame]
/*
* Copyright (C) 2007 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.voicedialer;
import android.app.Activity;
import android.content.Intent;
import android.speech.srec.MicrophoneInputStream;
import android.speech.srec.Recognizer;
import android.speech.srec.WaveHeader;
import android.util.Log;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
/**
* This class is a framework for recognizing speech. It must be extended to use.
* The child class must timplement setupGrammar and onRecognitionSuccess.
* A usage cycle is as follows:
* <ul>
* <li>Create with a reference to the {@link VoiceDialerActivity}.
* <li>Signal the user to start speaking with the Vibrator or beep.
* <li>Start audio input by creating a {@link MicrophoneInputStream}.
* <li>Create and configure a {@link Recognizer}.
* <li>Set up the grammar using setupGrammar.
* <li>Start the {@link Recognizer} running using data already being
* collected by the microphone.
* <li>Wait for the {@link Recognizer} to complete.
* <li>Process the results using onRecognitionSuccess, which will pass
* a list of intents to the {@RecogizerClient}.
* <li>Shut down and clean up.
* </ul>
* Notes:
* <ul>
* <li>Audio many be read from a file.
* <li>A directory tree of audio files may be stepped through.
* <li>A contact list may be read from a file.
* <li>A {@link RecognizerLogger} may generate a set of log files from
* a recognition session.
* <li>A static instance of this class is held and reused by the
* {@link VoiceDialerActivity}, which saves setup time.
* </ul>
*/
abstract public class RecognizerEngine {
protected static final String TAG = "RecognizerEngine";
protected static final String ACTION_RECOGNIZER_RESULT =
"com.android.voicedialer.ACTION_RECOGNIZER_RESULT";
public static final String SENTENCE_EXTRA = "sentence";
public static final String SEMANTIC_EXTRA = "semantic";
protected final String SREC_DIR = Recognizer.getConfigDir(null);
protected static final String OPEN_ENTRIES = "openentries.txt";
protected static final int RESULT_LIMIT = 5;
protected Activity mActivity;
protected Recognizer mSrec;
protected Recognizer.Grammar mSrecGrammar;
protected RecognizerLogger mLogger;
protected int mSampleRate;
/**
* Constructor.
*/
public RecognizerEngine() {
mSampleRate = 0;
}
abstract protected void setupGrammar() throws IOException, InterruptedException;
abstract protected void onRecognitionSuccess(RecognizerClient recognizerClient)
throws InterruptedException;
/**
* Start the recognition process.
*
* <ul>
* <li>Create and start the microphone.
* <li>Create a Recognizer.
* <li>set up the grammar (implementation is in child class)
* <li>Start the Recognizer.
* <li>Feed the Recognizer audio until it provides a result.
* <li>Build a list of Intents corresponding to the results. (implementation
* is in child class)
* <li>Stop the microphone.
* <li>Stop the Recognizer.
* </ul>
*
* @param recognizerClient client to be given the results
* @param activity the Activity this recognition is being run from.
* @param micFile optional audio input from this file, or directory tree.
* @param sampleRate the same rate coming from the mic or micFile
*/
public void recognize(RecognizerClient recognizerClient, Activity activity,
File micFile, int sampleRate) {
InputStream mic = null;
boolean recognizerStarted = false;
try {
mActivity = activity;
// set up logger
mLogger = null;
if (RecognizerLogger.isEnabled(mActivity)) {
mLogger = new RecognizerLogger(mActivity);
}
if (mSampleRate != sampleRate) {
// sample rate has changed since we last used this recognizerEngine.
// destroy the grammar and regenerate.
if (mSrecGrammar != null) {
mSrecGrammar.destroy();
}
mSrecGrammar = null;
mSampleRate = sampleRate;
}
// create a new recognizer
if (false) Log.d(TAG, "start new Recognizer");
if (mSrec == null) {
String parFilePath = SREC_DIR + "/baseline11k.par";
if (sampleRate == 8000) {
parFilePath = SREC_DIR + "/baseline8k.par";
}
mSrec = new Recognizer(parFilePath);
}
// start audio input
if (micFile != null) {
if (false) Log.d(TAG, "using mic file");
mic = new FileInputStream(micFile);
WaveHeader hdr = new WaveHeader();
hdr.read(mic);
} else {
if (false) Log.d(TAG, "start new MicrophoneInputStream");
mic = new MicrophoneInputStream(sampleRate, sampleRate * 15);
}
// notify UI
recognizerClient.onMicrophoneStart(mic);
// log audio if requested
if (mLogger != null) mic = mLogger.logInputStream(mic, sampleRate);
setupGrammar();
// start the recognition process
if (false) Log.d(TAG, "start mSrec.start");
mSrec.start();
recognizerStarted = true;
// recognize
while (true) {
if (Thread.interrupted()) throw new InterruptedException();
int event = mSrec.advance();
if (event != Recognizer.EVENT_INCOMPLETE &&
event != Recognizer.EVENT_NEED_MORE_AUDIO) {
Log.d(TAG, "start advance()=" +
Recognizer.eventToString(event) +
" avail " + mic.available());
}
switch (event) {
case Recognizer.EVENT_INCOMPLETE:
case Recognizer.EVENT_STARTED:
case Recognizer.EVENT_START_OF_VOICING:
case Recognizer.EVENT_END_OF_VOICING:
continue;
case Recognizer.EVENT_RECOGNITION_RESULT:
onRecognitionSuccess(recognizerClient);
break;
case Recognizer.EVENT_NEED_MORE_AUDIO:
mSrec.putAudio(mic);
continue;
default:
Log.d(TAG, "unknown event " + event);
recognizerClient.onRecognitionFailure(Recognizer.eventToString(event));
break;
}
break;
}
} catch (InterruptedException e) {
if (false) Log.d(TAG, "start interrupted " + e);
recognizerClient.onRecognitionError(e.toString());
} catch (IOException e) {
if (false) Log.d(TAG, "start new Srec failed " + e);
recognizerClient.onRecognitionError(e.toString());
} catch (Exception e) {
if (false) Log.d(TAG, "exception " + e);
recognizerClient.onRecognitionError(e.toString());
} finally {
if (false) Log.d(TAG, "start mSrec.stop");
if (mSrec != null && recognizerStarted) mSrec.stop();
// stop microphone
try {
if (mic != null) mic.close();
}
catch (IOException ex) {
if (false) Log.d(TAG, "start - mic.close failed - " + ex);
}
mic = null;
// close logger
try {
if (mLogger != null) mLogger.close();
}
catch (IOException ex) {
if (false) Log.d(TAG, "start - mLoggger.close failed - " + ex);
}
mLogger = null;
}
if (false) Log.d(TAG, "start bye");
}
protected static void addIntent(ArrayList<Intent> intents, Intent intent) {
for (Intent in : intents) {
if (in.getAction() != null &&
in.getAction().equals(intent.getAction()) &&
in.getData() != null &&
in.getData().equals(intent.getData())) {
return;
}
}
intent.setFlags(intent.getFlags() | Intent.FLAG_ACTIVITY_NEW_TASK);
intents.add(intent);
}
}