| /* |
| * Copyright (C) 2011 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| * use this file except in compliance with the License. You may obtain a copy of |
| * the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| * License for the specific language governing permissions and limitations under |
| * the License. |
| */ |
| package com.example.android.ttsengine; |
| |
| import android.content.Context; |
| import android.content.SharedPreferences; |
| import android.media.AudioFormat; |
| import android.speech.tts.SynthesisCallback; |
| import android.speech.tts.SynthesisRequest; |
| import android.speech.tts.TextToSpeech; |
| import android.speech.tts.TextToSpeechService; |
| import android.util.Log; |
| |
| import java.io.BufferedReader; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.nio.ByteBuffer; |
| import java.nio.ByteOrder; |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| /** |
| * A text to speech engine that generates "speech" that a robot might understand. |
| * The engine supports two different "languages", each with their own frequency |
| * mappings. |
| * |
| * It exercises all aspects of the Text to speech engine API |
| * {@link android.speech.tts.TextToSpeechService}. |
| */ |
| public class RobotSpeakTtsService extends TextToSpeechService { |
| private static final String TAG = "ExampleTtsService"; |
| |
| /* |
| * This is the sampling rate of our output audio. This engine outputs |
| * audio at 16khz 16bits per sample PCM audio. |
| */ |
| private static final int SAMPLING_RATE_HZ = 16000; |
| |
| /* |
| * We multiply by a factor of two since each sample contains 16 bits (2 bytes). |
| */ |
| private final byte[] mAudioBuffer = new byte[SAMPLING_RATE_HZ * 2]; |
| |
| private Map<Character, Integer> mFrequenciesMap; |
| private volatile String[] mCurrentLanguage = null; |
| private volatile boolean mStopRequested = false; |
| private SharedPreferences mSharedPrefs = null; |
| |
| @Override |
| public void onCreate() { |
| super.onCreate(); |
| mSharedPrefs = getSharedPreferences(GeneralSettingsFragment.SHARED_PREFS_NAME, |
| Context.MODE_PRIVATE); |
| // We load the default language when we start up. This isn't strictly |
| // required though, it can always be loaded lazily on the first call to |
| // onLoadLanguage or onSynthesizeText. This a tradeoff between memory usage |
| // and the latency of the first call. |
| onLoadLanguage("eng", "usa", ""); |
| } |
| |
| @Override |
| public void onDestroy() { |
| super.onDestroy(); |
| } |
| |
| @Override |
| protected String[] onGetLanguage() { |
| // Note that mCurrentLanguage is volatile because this can be called from |
| // multiple threads. |
| return mCurrentLanguage; |
| } |
| |
| @Override |
| protected int onIsLanguageAvailable(String lang, String country, String variant) { |
| // The robot speak synthesizer supports only english. |
| if ("eng".equals(lang)) { |
| // We support two specific robot languages, the british robot language |
| // and the american robot language. |
| if ("USA".equals(country) || "GBR".equals(country)) { |
| // If the engine supported a specific variant, we would have |
| // something like. |
| // |
| // if ("android".equals(variant)) { |
| // return TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE; |
| // } |
| return TextToSpeech.LANG_COUNTRY_AVAILABLE; |
| } |
| |
| // We support the language, but not the country. |
| return TextToSpeech.LANG_AVAILABLE; |
| } |
| |
| return TextToSpeech.LANG_NOT_SUPPORTED; |
| } |
| |
| /* |
| * Note that this method is synchronized, as is onSynthesizeText because |
| * onLoadLanguage can be called from multiple threads (while onSynthesizeText |
| * is always called from a single thread only). |
| */ |
| @Override |
| protected synchronized int onLoadLanguage(String lang, String country, String variant) { |
| final int isLanguageAvailable = onIsLanguageAvailable(lang, country, variant); |
| |
| if (isLanguageAvailable == TextToSpeech.LANG_NOT_SUPPORTED) { |
| return isLanguageAvailable; |
| } |
| |
| String loadCountry = country; |
| if (isLanguageAvailable == TextToSpeech.LANG_AVAILABLE) { |
| loadCountry = "USA"; |
| } |
| |
| // If we've already loaded the requested language, we can return early. |
| if (mCurrentLanguage != null) { |
| if (mCurrentLanguage[0].equals(lang) && mCurrentLanguage[1].equals(country)) { |
| return isLanguageAvailable; |
| } |
| } |
| |
| Map<Character, Integer> newFrequenciesMap = null; |
| try { |
| InputStream file = getAssets().open(lang + "-" + loadCountry + ".freq"); |
| newFrequenciesMap = buildFrequencyMap(file); |
| file.close(); |
| } catch (IOException e) { |
| Log.e(TAG, "Error loading data for : " + lang + "-" + country); |
| } |
| |
| mFrequenciesMap = newFrequenciesMap; |
| mCurrentLanguage = new String[] { lang, loadCountry, ""}; |
| |
| return isLanguageAvailable; |
| } |
| |
| @Override |
| protected void onStop() { |
| mStopRequested = true; |
| } |
| |
| @Override |
| protected synchronized void onSynthesizeText(SynthesisRequest request, |
| SynthesisCallback callback) { |
| // Note that we call onLoadLanguage here since there is no guarantee |
| // that there would have been a prior call to this function. |
| int load = onLoadLanguage(request.getLanguage(), request.getCountry(), |
| request.getVariant()); |
| |
| // We might get requests for a language we don't support - in which case |
| // we error out early before wasting too much time. |
| if (load == TextToSpeech.LANG_NOT_SUPPORTED) { |
| callback.error(); |
| return; |
| } |
| |
| // At this point, we have loaded the language we need for synthesis and |
| // it is guaranteed that we support it so we proceed with synthesis. |
| |
| // We denote that we are ready to start sending audio across to the |
| // framework. We use a fixed sampling rate (16khz), and send data across |
| // in 16bit PCM mono. |
| callback.start(SAMPLING_RATE_HZ, |
| AudioFormat.ENCODING_PCM_16BIT, 1 /* Number of channels. */); |
| |
| // We then scan through each character of the request string and |
| // generate audio for it. |
| final String text = request.getText().toLowerCase(); |
| for (int i = 0; i < text.length(); ++i) { |
| char value = normalize(text.charAt(i)); |
| // It is crucial to call either of callback.error() or callback.done() to ensure |
| // that audio / other resources are released as soon as possible. |
| if (!generateOneSecondOfAudio(value, callback)) { |
| callback.error(); |
| return; |
| } |
| } |
| |
| // Alright, we're done with our synthesis - yay! |
| callback.done(); |
| } |
| |
| /* |
| * Normalizes a given character to the range 'a' - 'z' (inclusive). Our |
| * frequency mappings contain frequencies for each of these characters. |
| */ |
| private static char normalize(char input) { |
| if (input == ' ') { |
| return input; |
| } |
| |
| if (input < 'a') { |
| return 'a'; |
| } |
| if (input > 'z') { |
| return 'z'; |
| } |
| |
| return input; |
| } |
| |
| private Map<Character, Integer> buildFrequencyMap(InputStream is) throws IOException { |
| BufferedReader br = new BufferedReader(new InputStreamReader(is)); |
| String line = null; |
| Map<Character, Integer> map = new HashMap<Character, Integer>(); |
| try { |
| while ((line = br.readLine()) != null) { |
| String[] parts = line.split(":"); |
| if (parts.length != 2) { |
| throw new IOException("Invalid line encountered: " + line); |
| } |
| map.put(parts[0].charAt(0), Integer.parseInt(parts[1])); |
| } |
| map.put(' ', 0); |
| return map; |
| } finally { |
| is.close(); |
| } |
| } |
| |
| private boolean generateOneSecondOfAudio(char alphabet, SynthesisCallback cb) { |
| ByteBuffer buffer = ByteBuffer.wrap(mAudioBuffer).order(ByteOrder.LITTLE_ENDIAN); |
| |
| // Someone called onStop, end the current synthesis and return. |
| // The mStopRequested variable will be reset at the beginning of the |
| // next synthesis. |
| // |
| // In general, a call to onStop( ) should make a best effort attempt |
| // to stop all processing for the *current* onSynthesizeText request (if |
| // one is active). |
| if (mStopRequested) { |
| return false; |
| } |
| |
| |
| if (mFrequenciesMap == null || !mFrequenciesMap.containsKey(alphabet)) { |
| return false; |
| } |
| |
| final int frequency = mFrequenciesMap.get(alphabet); |
| |
| if (frequency > 0) { |
| // This is the wavelength in samples. The frequency is chosen so that the |
| // waveLength is always a multiple of two and frequency divides the |
| // SAMPLING_RATE exactly. |
| final int waveLength = SAMPLING_RATE_HZ / frequency; |
| final int times = SAMPLING_RATE_HZ / waveLength; |
| |
| for (int j = 0; j < times; ++j) { |
| // For a square curve, half of the values will be at Short.MIN_VALUE |
| // and the other half will be Short.MAX_VALUE. |
| for (int i = 0; i < waveLength / 2; ++i) { |
| buffer.putShort((short)(getAmplitude() * -1)); |
| } |
| for (int i = 0; i < waveLength / 2; ++i) { |
| buffer.putShort(getAmplitude()); |
| } |
| } |
| } else { |
| // Play a second of silence. |
| for (int i = 0; i < mAudioBuffer.length / 2; ++i) { |
| buffer.putShort((short) 0); |
| } |
| } |
| |
| // Get the maximum allowed size of data we can send across in audioAvailable. |
| final int maxBufferSize = cb.getMaxBufferSize(); |
| int offset = 0; |
| while (offset < mAudioBuffer.length) { |
| int bytesToWrite = Math.min(maxBufferSize, mAudioBuffer.length - offset); |
| cb.audioAvailable(mAudioBuffer, offset, bytesToWrite); |
| offset += bytesToWrite; |
| } |
| return true; |
| } |
| |
| private short getAmplitude() { |
| boolean whisper = mSharedPrefs.getBoolean(GeneralSettingsFragment.WHISPER_KEY, false); |
| return (short) (whisper ? 2048 : 8192); |
| } |
| } |