blob: f5e5f4d8356ca57f92061170e4c6eaf10e821f6b [file] [log] [blame]
/*
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package android.speech.tts;
import android.annotation.IntDef;
import android.annotation.IntRange;
import android.media.AudioFormat;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
/**
* A callback to return speech data synthesized by a text to speech engine.
*
* The engine can provide streaming audio by calling
* {@link #start}, then {@link #audioAvailable} until all audio has been provided, then finally
* {@link #done}.
*
* {@link #error} can be called at any stage in the synthesis process to
* indicate that an error has occurred, but if the call is made after a call
* to {@link #done}, it might be discarded.
*
* {@link #done} must be called at the end of synthesis, regardless of errors.
*
* All methods can be only called on the synthesis thread.
*/
public interface SynthesisCallback {
/** @hide */
@Retention(RetentionPolicy.SOURCE)
@IntDef({
AudioFormat.ENCODING_PCM_8BIT,
AudioFormat.ENCODING_PCM_16BIT,
AudioFormat.ENCODING_PCM_FLOAT
})
@interface SupportedAudioFormat {};
/**
* @return the maximum number of bytes that the TTS engine can pass in a single call of {@link
* #audioAvailable}. Calls to {@link #audioAvailable} with data lengths larger than this
* value will not succeed.
*/
int getMaxBufferSize();
/**
* The service should call this when it starts to synthesize audio for this request.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*
* @param sampleRateInHz Sample rate in HZ of the generated audio.
* @param audioFormat Audio format of the generated audio. Must be one of {@link
* AudioFormat#ENCODING_PCM_8BIT} or {@link AudioFormat#ENCODING_PCM_16BIT}. Can also be
* {@link AudioFormat#ENCODING_PCM_FLOAT} when targetting Android N and above.
* @param channelCount The number of channels. Must be {@code 1} or {@code 2}.
* @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
* android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
*/
int start(
int sampleRateInHz,
@SupportedAudioFormat int audioFormat,
@IntRange(from = 1, to = 2) int channelCount);
/**
* The service should call this method when synthesized audio is ready for consumption.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*
* @param buffer The generated audio data. This method will not hold on to {@code buffer}, so the
* caller is free to modify it after this method returns.
* @param offset The offset into {@code buffer} where the audio data starts.
* @param length The number of bytes of audio data in {@code buffer}. This must be less than or
* equal to the return value of {@link #getMaxBufferSize}.
* @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
* android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
*/
int audioAvailable(byte[] buffer, int offset, int length);
/**
* The service should call this method when all the synthesized audio for a request has been
* passed to {@link #audioAvailable}.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*
* <p>This method has to be called if {@link #start} and/or {@link #error} was called.
*
* @return {@link android.speech.tts.TextToSpeech#SUCCESS}, {@link
* android.speech.tts.TextToSpeech#ERROR} or {@link android.speech.tts.TextToSpeech#STOPPED}.
*/
int done();
/**
* The service should call this method if the speech synthesis fails.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*/
void error();
/**
* The service should call this method if the speech synthesis fails.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*
* @param errorCode Error code to pass to the client. One of the ERROR_ values from {@link
* android.speech.tts.TextToSpeech}
*/
void error(@TextToSpeech.Error int errorCode);
/**
* Check if {@link #start} was called or not.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*
* <p>Useful for checking if a fallback from network request is possible.
*/
boolean hasStarted();
/**
* Check if {@link #done} was called or not.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*
* <p>Useful for checking if a fallback from network request is possible.
*/
boolean hasFinished();
/**
* The service may call this method to provide timing information about the spoken text.
*
* <p>Calling this method means that at the given audio frame, the given range of the input is
* about to be spoken. If this method is called the client will receive a callback on the
* listener ({@link UtteranceProgressListener#onRangeStart}) at the moment that frame has been
* reached by the playback head.
*
* <p>This information can be used by the client, for example, to highlight ranges of the text
* while it is spoken.
*
* <p>The markerInFrames is a frame index into the audio for this synthesis request, i.e. into
* the concatenation of the audio bytes sent to audioAvailable for this synthesis request. The
* definition of a frame depends on the format given by {@link #start}. See {@link AudioFormat}
* for more information.
*
* <p>This method should only be called on the synthesis thread, while in {@link
* TextToSpeechService#onSynthesizeText}.
*
* @param markerInFrames The position in frames in the audio where this range is spoken.
* @param start The start index of the range in the input text.
* @param end The end index (exclusive) of the range in the input text.
*/
default void rangeStart(int markerInFrames, int start, int end) {}
}