| /* |
| * Copyright (C) 2019 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.android.car.assist.client.tts; |
| |
| import android.content.Context; |
| import android.media.AudioAttributes; |
| import android.media.AudioFocusRequest; |
| import android.media.AudioManager; |
| import android.os.Handler; |
| import android.speech.tts.TextToSpeech; |
| import android.speech.tts.UtteranceProgressListener; |
| import android.util.Log; |
| import android.util.Pair; |
| |
| import androidx.annotation.VisibleForTesting; |
| |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.concurrent.TimeUnit; |
| import java.util.function.BiConsumer; |
| |
| /** |
| * Component that wraps platform TTS engine and supports play-out of batches of text. |
| * <p> |
| * It takes care of setting up TTS Engine when text is played out and shutting it down after an idle |
| * period with no play-out. This is desirable since the owning app is long-lived and the TTS Engine |
| * brings up another service-process. |
| * <p> |
| * As batches of text are played-out, they issue callbacks on the {@link Listener} provided with the |
| * batch. |
| */ |
| public class TextToSpeechHelper { |
| /** |
| * Listener interface used by clients to be notified as batch of text is played out. |
| */ |
| public interface Listener { |
| /** |
| * Called when play-out starts for batch. May never get called if batch has errors or |
| * interruptions. |
| */ |
| void onTextToSpeechStarted(long requestId); |
| |
| /** |
| * Called when play-out ends for batch. |
| * |
| * @param error Whether play-out ended due to an error or not. Note: if it was aborted, it's |
| * not considered an error. |
| */ |
| void onTextToSpeechStopped(long requestId, boolean error); |
| } |
| |
| private static final String TAG = "CM#TextToSpeechHelper"; |
| |
| private static final String UTTERANCE_ID_SEPARATOR = ";"; |
| private static final long DEFAULT_SHUTDOWN_DELAY_MILLIS = TimeUnit.MINUTES.toMillis(1); |
| |
| private final Map<String, BatchListener> mListeners = new HashMap<>(); |
| private final Handler mHandler = new Handler(); |
| private final Context mContext; |
| private final TextToSpeechHelper.Listener mListener; |
| private final AudioManager.OnAudioFocusChangeListener mNoOpListener = (f) -> { /* NO-OP */ }; |
| private final AudioManager mAudioManager; |
| private final AudioAttributes mAudioAttributes; |
| private final AudioFocusRequest mAudioFocusRequest; |
| private final long mShutdownDelayMillis; |
| private TextToSpeechEngine mTextToSpeechEngine; |
| private int mInitStatus; |
| private SpeechRequest mPendingRequest; |
| private String mCurrentBatchId; |
| |
| private final Runnable mMaybeShutdownRunnable = new Runnable() { |
| @Override |
| public void run() { |
| if (mListeners.isEmpty() || mPendingRequest == null) { |
| shutdownEngine(); |
| } else { |
| mHandler.postDelayed(this, mShutdownDelayMillis); |
| } |
| } |
| }; |
| |
| public TextToSpeechHelper(Context context, TextToSpeechHelper.Listener listener) { |
| this(context, new AndroidTextToSpeechEngine(), DEFAULT_SHUTDOWN_DELAY_MILLIS, listener); |
| } |
| |
| @VisibleForTesting |
| TextToSpeechHelper(Context context, TextToSpeechEngine ttsEngine, long shutdownDelayMillis, |
| TextToSpeechHelper.Listener listener) { |
| mContext = context; |
| mAudioManager = (AudioManager) mContext.getSystemService(Context.AUDIO_SERVICE); |
| mTextToSpeechEngine = ttsEngine; |
| mShutdownDelayMillis = shutdownDelayMillis; |
| // OnInitListener will only set to SUCCESS/ERROR. So we initialize to STOPPED. |
| mInitStatus = TextToSpeech.STOPPED; |
| mListener = listener; |
| mAudioAttributes = new AudioAttributes.Builder() |
| .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) |
| .setUsage(AudioAttributes.USAGE_ASSISTANT) |
| .build(); |
| mAudioFocusRequest = new AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT) |
| .setAudioAttributes(mAudioAttributes) |
| .setOnAudioFocusChangeListener(mNoOpListener) |
| .build(); |
| } |
| |
| private void maybeInitAndKeepAlive() { |
| if (!mTextToSpeechEngine.isInitialized()) { |
| if (Log.isLoggable(TAG, Log.DEBUG)) { |
| Log.d(TAG, "Initializing TTS Engine"); |
| } |
| mTextToSpeechEngine.initialize(mContext, this::handleInitCompleted); |
| mTextToSpeechEngine.setOnUtteranceProgressListener(mProgressListener); |
| mTextToSpeechEngine.setAudioAttributes(mAudioAttributes); |
| } |
| // Since we're handling a request, delay engine shutdown. |
| mHandler.removeCallbacks(mMaybeShutdownRunnable); |
| mHandler.postDelayed(mMaybeShutdownRunnable, mShutdownDelayMillis); |
| } |
| |
| private void handleInitCompleted(int initStatus) { |
| if (Log.isLoggable(TAG, Log.DEBUG)) { |
| Log.d(TAG, String.format("Init completed. Status: %d", initStatus)); |
| } |
| mInitStatus = initStatus; |
| if (mPendingRequest != null) { |
| playInternal(mPendingRequest.mTextToSpeak, mPendingRequest.mRequestId); |
| mPendingRequest = null; |
| } |
| } |
| |
| /** |
| * Plays out given batch of text. If engine is not active, it is setup and the request is stored |
| * until then. Only one batch is supported at a time; If a previous batch is waiting engine |
| * setup, that batch is dropped. If a previous batch is playing, the play-out is stopped and |
| * next one is passed to the TTS Engine. Callbacks are issued on the provided {@code listener}. |
| * Will request audio focus first, failure will trigger onAudioFocusFailed in listener. |
| * <p/> |
| * NOTE: Underlying engine may have limit on length of text in each element of the batch; it |
| * will reject anything longer. See {@link TextToSpeech#getMaxSpeechInputLength()}. |
| * |
| * @param textToSpeak Batch of text to play-out. |
| * @param requestId The tracking request id |
| * @return true if the request to play was successful |
| */ |
| public boolean requestPlay(List<CharSequence> textToSpeak, long requestId) { |
| if (textToSpeak.isEmpty()) { |
| /* no-op */ |
| return true; |
| } |
| int result = mAudioManager.requestAudioFocus(mAudioFocusRequest); |
| if (result != AudioManager.AUDIOFOCUS_REQUEST_GRANTED) { |
| return false; |
| } |
| maybeInitAndKeepAlive(); |
| |
| // Check if its still initializing. |
| if (mInitStatus == TextToSpeech.STOPPED) { |
| // Squash any already queued request. |
| if (mPendingRequest != null) { |
| onTtsStopped(requestId, /* error= */ false); |
| } |
| mPendingRequest = new SpeechRequest(textToSpeak, requestId); |
| } else { |
| playInternal(textToSpeak, requestId); |
| } |
| return true; |
| } |
| |
| /** Requests that all play-out be stopped. */ |
| public void requestStop() { |
| mTextToSpeechEngine.stop(); |
| mCurrentBatchId = null; |
| } |
| |
| public boolean isSpeaking() { |
| return mTextToSpeechEngine.isSpeaking(); |
| } |
| |
| // wrap call back to listener.onTextToSpeechStopped with adandonAudioFocus. |
| private void onTtsStopped(long requestId, boolean error) { |
| mAudioManager.abandonAudioFocusRequest(mAudioFocusRequest); |
| mHandler.post(() -> mListener.onTextToSpeechStopped(requestId, error)); |
| } |
| |
| private void playInternal(List<CharSequence> textToSpeak, long requestId) { |
| if (mInitStatus == TextToSpeech.ERROR) { |
| Log.e(TAG, "TTS setup failed!"); |
| onTtsStopped(requestId, /* error= */ true); |
| return; |
| } |
| |
| // Abort anything currently playing and flushes queue. |
| mTextToSpeechEngine.stop(); |
| |
| // Queue up new batch. We assign id's = "batchId;index" where index increments from 0 |
| // to batchSize - 1. If queueing fails, we abort the whole batch. |
| mCurrentBatchId = Long.toString(requestId); |
| for (int i = 0; i < textToSpeak.size(); i++) { |
| CharSequence text = textToSpeak.get(i); |
| String utteranceId = |
| String.format("%s%s%d", mCurrentBatchId, UTTERANCE_ID_SEPARATOR, i); |
| if (Log.isLoggable(TAG, Log.DEBUG)) { |
| Log.d(TAG, String.format("Queueing tts: '%s' [%s]", text, utteranceId)); |
| } |
| if (mTextToSpeechEngine.speak(text, TextToSpeech.QUEUE_ADD, /* params= */ null, |
| utteranceId) != TextToSpeech.SUCCESS) { |
| mTextToSpeechEngine.stop(); |
| mCurrentBatchId = null; |
| Log.e(TAG, "Queuing text failed!"); |
| onTtsStopped(requestId, /* error= */ true); |
| return; |
| } |
| } |
| // Register BatchListener for entire batch. Will invoke callbacks on Listener as batch |
| // progresses. |
| mListeners.put(mCurrentBatchId, new BatchListener(requestId, textToSpeak.size())); |
| } |
| |
| /** |
| * Releases resources and shuts down TTS Engine. |
| */ |
| public void cleanup() { |
| mHandler.removeCallbacksAndMessages(/* token= */ null); |
| shutdownEngine(); |
| } |
| |
| /** Returns the stream used by the TTS engine. */ |
| public int getStream() { |
| return mTextToSpeechEngine.getStream(); |
| } |
| |
| private void shutdownEngine() { |
| if (mTextToSpeechEngine.isInitialized()) { |
| if (Log.isLoggable(TAG, Log.DEBUG)) { |
| Log.d(TAG, "Shutting down TTS Engine"); |
| } |
| mTextToSpeechEngine.stop(); |
| mTextToSpeechEngine.shutdown(); |
| mInitStatus = TextToSpeech.STOPPED; |
| } |
| } |
| |
| private static Pair<String, Integer> parse(String utteranceId) { |
| try { |
| String[] pair = utteranceId.split(UTTERANCE_ID_SEPARATOR); |
| String batchId = pair[0]; |
| int index = Integer.valueOf(pair[1]); |
| return Pair.create(batchId, index); |
| } catch (IndexOutOfBoundsException | NumberFormatException e) { |
| throw new IllegalArgumentException( |
| String.format("Utterance ID is invalid: %s.", utteranceId) |
| ); |
| } |
| } |
| |
| // Handles all callbacks from TextToSpeechEngine. Possible order of callbacks: |
| // - onStart, onDone: successful play-out. |
| // - onStart, onStop: play-out starts, but interrupted. |
| // - onStart, onError: play-out starts and fails. |
| // - onStop: play-out never starts, but aborted. |
| // - onError: play-out never starts, but fails. |
| // Since the callbacks arrive on other threads, they are dispatched onto mHandler where the |
| // appropriate BatchListener is invoked. |
| private final UtteranceProgressListener mProgressListener = new UtteranceProgressListener() { |
| private void safeInvokeAsync(String utteranceId, |
| BiConsumer<BatchListener, Pair<String, Integer>> callback) { |
| mHandler.post(() -> { |
| Pair<String, Integer> parsedId = parse(utteranceId); |
| BatchListener listener = mListeners.get(parsedId.first); |
| if (listener != null) { |
| callback.accept(listener, parsedId); |
| } else { |
| if (Log.isLoggable(TAG, Log.DEBUG)) { |
| Log.d(TAG, "Missing batch listener: " + utteranceId); |
| } |
| } |
| }); |
| } |
| |
| @Override |
| public void onStart(String utteranceId) { |
| if (Log.isLoggable(TAG, Log.DEBUG)) { |
| Log.d(TAG, "TTS onStart: " + utteranceId); |
| } |
| mHandler.post(() -> { |
| Pair<String, Integer> parsedId = parse(utteranceId); |
| BatchListener listener = mListeners.get(parsedId.first); |
| if (listener != null) { |
| listener.onStart(); |
| } else { |
| if (Log.isLoggable(TAG, Log.DEBUG)) { |
| Log.d(TAG, "Missing batch listener: " + utteranceId); |
| } |
| } |
| }); |
| } |
| |
| @Override |
| public void onDone(String utteranceId) { |
| if (Log.isLoggable(TAG, Log.DEBUG)) { |
| Log.d(TAG, "TTS onDone: " + utteranceId); |
| } |
| safeInvokeAsync(utteranceId, BatchListener::onDone); |
| } |
| |
| @Override |
| public void onStop(String utteranceId, boolean interrupted) { |
| if (Log.isLoggable(TAG, Log.DEBUG)) { |
| Log.d(TAG, "TTS onStop: " + utteranceId); |
| } |
| safeInvokeAsync(utteranceId, BatchListener::onStop); |
| } |
| |
| @Override |
| public void onError(String utteranceId) { |
| if (Log.isLoggable(TAG, Log.DEBUG)) { |
| Log.d(TAG, "TTS onError: " + utteranceId); |
| } |
| safeInvokeAsync(utteranceId, BatchListener::onError); |
| } |
| }; |
| |
| /** |
| * Handles callbacks for a single batch of TTS text and issues callbacks on wrapped |
| * {@link Listener} that client is listening on. |
| */ |
| private class BatchListener { |
| private boolean mBatchStarted; |
| private final long mRequestId; |
| private final int mUtteranceCount; |
| |
| BatchListener(long requestId, int utteranceCount) { |
| mRequestId = requestId; |
| mUtteranceCount = utteranceCount; |
| } |
| |
| // Issues Listener.onTextToSpeechStarted when first item of batch starts. |
| void onStart() { |
| if (!mBatchStarted) { |
| mBatchStarted = true; |
| mListener.onTextToSpeechStarted(mRequestId); |
| } |
| } |
| |
| // Issues Listener.onTextToSpeechStopped when last item of batch finishes. |
| void onDone(Pair<String, Integer> parsedId) { |
| // parseId is zero-indexed, mUtteranceCount is not. |
| if (parsedId.second == (mUtteranceCount - 1)) { |
| handleBatchFinished(parsedId, /* error= */ false); |
| } |
| } |
| |
| // If any item of batch fails, abort the batch and issue Listener.onTextToSpeechStopped. |
| void onError(Pair<String, Integer> parsedId) { |
| if (parsedId.first.equals(mCurrentBatchId)) { |
| mTextToSpeechEngine.stop(); |
| } |
| handleBatchFinished(parsedId, /* error= */ true); |
| } |
| |
| // If any item of batch is preempted (rest should also be), |
| // issue Listener.onTextToSpeechStopped. |
| void onStop(Pair<String, Integer> parsedId) { |
| handleBatchFinished(parsedId, /* error= */ false); |
| } |
| |
| // Handles terminal callbacks for the batch. We invoke stopped and remove ourselves. |
| // No further callbacks will be handled for the batch. |
| private void handleBatchFinished(Pair<String, Integer> parsedId, boolean error) { |
| onTtsStopped(mRequestId, error); |
| mListeners.remove(parsedId.first); |
| } |
| } |
| |
| private static class SpeechRequest { |
| final List<CharSequence> mTextToSpeak; |
| final long mRequestId; |
| |
| SpeechRequest(List<CharSequence> textToSpeak, long requestId) { |
| mTextToSpeak = textToSpeak; |
| mRequestId = requestId; |
| } |
| } |
| } |