|  | /* | 
|  | * Copyright (C) 2011 The Android Open Source Project | 
|  | * | 
|  | * Licensed under the Apache License, Version 2.0 (the "License"); you may not | 
|  | * use this file except in compliance with the License. You may obtain a copy of | 
|  | * the License at | 
|  | * | 
|  | * http://www.apache.org/licenses/LICENSE-2.0 | 
|  | * | 
|  | * Unless required by applicable law or agreed to in writing, software | 
|  | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | 
|  | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | 
|  | * License for the specific language governing permissions and limitations under | 
|  | * the License. | 
|  | */ | 
|  | package com.example.android.ttsengine; | 
|  |  | 
|  | import android.content.Context; | 
|  | import android.content.SharedPreferences; | 
|  | import android.media.AudioFormat; | 
|  | import android.speech.tts.SynthesisCallback; | 
|  | import android.speech.tts.SynthesisRequest; | 
|  | import android.speech.tts.TextToSpeech; | 
|  | import android.speech.tts.TextToSpeechService; | 
|  | import android.util.Log; | 
|  |  | 
|  | import java.io.BufferedReader; | 
|  | import java.io.IOException; | 
|  | import java.io.InputStream; | 
|  | import java.io.InputStreamReader; | 
|  | import java.nio.ByteBuffer; | 
|  | import java.nio.ByteOrder; | 
|  | import java.util.HashMap; | 
|  | import java.util.Map; | 
|  |  | 
|  | /** | 
|  | * A text to speech engine that generates "speech" that a robot might understand. | 
|  | * The engine supports two different "languages", each with their own frequency | 
|  | * mappings. | 
|  | * | 
|  | * It exercises all aspects of the Text to speech engine API | 
|  | * {@link android.speech.tts.TextToSpeechService}. | 
|  | */ | 
|  | public class RobotSpeakTtsService extends TextToSpeechService { | 
|  | private static final String TAG = "ExampleTtsService"; | 
|  |  | 
|  | /* | 
|  | * This is the sampling rate of our output audio. This engine outputs | 
|  | * audio at 16khz 16bits per sample PCM audio. | 
|  | */ | 
|  | private static final int SAMPLING_RATE_HZ = 16000; | 
|  |  | 
|  | /* | 
|  | * We multiply by a factor of two since each sample contains 16 bits (2 bytes). | 
|  | */ | 
|  | private final byte[] mAudioBuffer = new byte[SAMPLING_RATE_HZ * 2]; | 
|  |  | 
|  | private Map<Character, Integer> mFrequenciesMap; | 
|  | private volatile String[] mCurrentLanguage = null; | 
|  | private volatile boolean mStopRequested = false; | 
|  | private SharedPreferences mSharedPrefs = null; | 
|  |  | 
|  | @Override | 
|  | public void onCreate() { | 
|  | super.onCreate(); | 
|  | mSharedPrefs = getSharedPreferences(GeneralSettingsFragment.SHARED_PREFS_NAME, | 
|  | Context.MODE_PRIVATE); | 
|  | // We load the default language when we start up. This isn't strictly | 
|  | // required though, it can always be loaded lazily on the first call to | 
|  | // onLoadLanguage or onSynthesizeText. This a tradeoff between memory usage | 
|  | // and the latency of the first call. | 
|  | onLoadLanguage("eng", "usa", ""); | 
|  | } | 
|  |  | 
|  | @Override | 
|  | public void onDestroy() { | 
|  | super.onDestroy(); | 
|  | } | 
|  |  | 
|  | @Override | 
|  | protected String[] onGetLanguage() { | 
|  | // Note that mCurrentLanguage is volatile because this can be called from | 
|  | // multiple threads. | 
|  | return mCurrentLanguage; | 
|  | } | 
|  |  | 
|  | @Override | 
|  | protected int onIsLanguageAvailable(String lang, String country, String variant) { | 
|  | // The robot speak synthesizer supports only english. | 
|  | if ("eng".equals(lang)) { | 
|  | // We support two specific robot languages, the british robot language | 
|  | // and the american robot language. | 
|  | if ("USA".equals(country) || "GBR".equals(country)) { | 
|  | // If the engine supported a specific variant, we would have | 
|  | // something like. | 
|  | // | 
|  | // if ("android".equals(variant)) { | 
|  | //     return TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE; | 
|  | // } | 
|  | return TextToSpeech.LANG_COUNTRY_AVAILABLE; | 
|  | } | 
|  |  | 
|  | // We support the language, but not the country. | 
|  | return TextToSpeech.LANG_AVAILABLE; | 
|  | } | 
|  |  | 
|  | return TextToSpeech.LANG_NOT_SUPPORTED; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Note that this method is synchronized, as is onSynthesizeText because | 
|  | * onLoadLanguage can be called from multiple threads (while onSynthesizeText | 
|  | * is always called from a single thread only). | 
|  | */ | 
|  | @Override | 
|  | protected synchronized int onLoadLanguage(String lang, String country, String variant) { | 
|  | final int isLanguageAvailable = onIsLanguageAvailable(lang, country, variant); | 
|  |  | 
|  | if (isLanguageAvailable == TextToSpeech.LANG_NOT_SUPPORTED) { | 
|  | return isLanguageAvailable; | 
|  | } | 
|  |  | 
|  | String loadCountry = country; | 
|  | if (isLanguageAvailable == TextToSpeech.LANG_AVAILABLE) { | 
|  | loadCountry = "USA"; | 
|  | } | 
|  |  | 
|  | // If we've already loaded the requested language, we can return early. | 
|  | if (mCurrentLanguage != null) { | 
|  | if (mCurrentLanguage[0].equals(lang) && mCurrentLanguage[1].equals(country)) { | 
|  | return isLanguageAvailable; | 
|  | } | 
|  | } | 
|  |  | 
|  | Map<Character, Integer> newFrequenciesMap = null; | 
|  | try { | 
|  | InputStream file = getAssets().open(lang + "-" + loadCountry + ".freq"); | 
|  | newFrequenciesMap = buildFrequencyMap(file); | 
|  | file.close(); | 
|  | } catch (IOException e) { | 
|  | Log.e(TAG, "Error loading data for : " + lang + "-" + country); | 
|  | } | 
|  |  | 
|  | mFrequenciesMap = newFrequenciesMap; | 
|  | mCurrentLanguage = new String[] { lang, loadCountry, ""}; | 
|  |  | 
|  | return isLanguageAvailable; | 
|  | } | 
|  |  | 
|  | @Override | 
|  | protected void onStop() { | 
|  | mStopRequested = true; | 
|  | } | 
|  |  | 
|  | @Override | 
|  | protected synchronized void onSynthesizeText(SynthesisRequest request, | 
|  | SynthesisCallback callback) { | 
|  | // Note that we call onLoadLanguage here since there is no guarantee | 
|  | // that there would have been a prior call to this function. | 
|  | int load = onLoadLanguage(request.getLanguage(), request.getCountry(), | 
|  | request.getVariant()); | 
|  |  | 
|  | // We might get requests for a language we don't support - in which case | 
|  | // we error out early before wasting too much time. | 
|  | if (load == TextToSpeech.LANG_NOT_SUPPORTED) { | 
|  | callback.error(); | 
|  | return; | 
|  | } | 
|  |  | 
|  | // At this point, we have loaded the language we need for synthesis and | 
|  | // it is guaranteed that we support it so we proceed with synthesis. | 
|  |  | 
|  | // We denote that we are ready to start sending audio across to the | 
|  | // framework. We use a fixed sampling rate (16khz), and send data across | 
|  | // in 16bit PCM mono. | 
|  | callback.start(SAMPLING_RATE_HZ, | 
|  | AudioFormat.ENCODING_PCM_16BIT, 1 /* Number of channels. */); | 
|  |  | 
|  | // We then scan through each character of the request string and | 
|  | // generate audio for it. | 
|  | final String text = request.getText().toLowerCase(); | 
|  | for (int i = 0; i < text.length(); ++i) { | 
|  | char value = normalize(text.charAt(i)); | 
|  | // It is crucial to call either of callback.error() or callback.done() to ensure | 
|  | // that audio / other resources are released as soon as possible. | 
|  | if (!generateOneSecondOfAudio(value, callback)) { | 
|  | callback.error(); | 
|  | return; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Alright, we're done with our synthesis - yay! | 
|  | callback.done(); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Normalizes a given character to the range 'a' - 'z' (inclusive). Our | 
|  | * frequency mappings contain frequencies for each of these characters. | 
|  | */ | 
|  | private static char normalize(char input) { | 
|  | if (input == ' ') { | 
|  | return input; | 
|  | } | 
|  |  | 
|  | if (input < 'a') { | 
|  | return 'a'; | 
|  | } | 
|  | if (input > 'z') { | 
|  | return 'z'; | 
|  | } | 
|  |  | 
|  | return input; | 
|  | } | 
|  |  | 
|  | private Map<Character, Integer> buildFrequencyMap(InputStream is) throws IOException { | 
|  | BufferedReader br = new BufferedReader(new InputStreamReader(is)); | 
|  | String line = null; | 
|  | Map<Character, Integer> map = new HashMap<Character, Integer>(); | 
|  | try { | 
|  | while ((line = br.readLine()) != null) { | 
|  | String[] parts = line.split(":"); | 
|  | if (parts.length != 2) { | 
|  | throw new IOException("Invalid line encountered: " + line); | 
|  | } | 
|  | map.put(parts[0].charAt(0), Integer.parseInt(parts[1])); | 
|  | } | 
|  | map.put(' ', 0); | 
|  | return map; | 
|  | } finally { | 
|  | is.close(); | 
|  | } | 
|  | } | 
|  |  | 
|  | private boolean generateOneSecondOfAudio(char alphabet, SynthesisCallback cb) { | 
|  | ByteBuffer buffer = ByteBuffer.wrap(mAudioBuffer).order(ByteOrder.LITTLE_ENDIAN); | 
|  |  | 
|  | // Someone called onStop, end the current synthesis and return. | 
|  | // The mStopRequested variable will be reset at the beginning of the | 
|  | // next synthesis. | 
|  | // | 
|  | // In general, a call to onStop( ) should make a best effort attempt | 
|  | // to stop all processing for the *current* onSynthesizeText request (if | 
|  | // one is active). | 
|  | if (mStopRequested) { | 
|  | return false; | 
|  | } | 
|  |  | 
|  |  | 
|  | if (mFrequenciesMap == null || !mFrequenciesMap.containsKey(alphabet)) { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | final int frequency = mFrequenciesMap.get(alphabet); | 
|  |  | 
|  | if (frequency > 0) { | 
|  | // This is the wavelength in samples. The frequency is chosen so that the | 
|  | // waveLength is always a multiple of two and frequency divides the | 
|  | // SAMPLING_RATE exactly. | 
|  | final int waveLength = SAMPLING_RATE_HZ / frequency; | 
|  | final int times = SAMPLING_RATE_HZ / waveLength; | 
|  |  | 
|  | for (int j = 0; j < times; ++j) { | 
|  | // For a square curve, half of the values will be at Short.MIN_VALUE | 
|  | // and the other half will be Short.MAX_VALUE. | 
|  | for (int i = 0; i < waveLength / 2; ++i) { | 
|  | buffer.putShort((short)(getAmplitude() * -1)); | 
|  | } | 
|  | for (int i = 0; i < waveLength / 2; ++i) { | 
|  | buffer.putShort(getAmplitude()); | 
|  | } | 
|  | } | 
|  | } else { | 
|  | // Play a second of silence. | 
|  | for (int i = 0; i < mAudioBuffer.length / 2; ++i) { | 
|  | buffer.putShort((short) 0); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Get the maximum allowed size of data we can send across in audioAvailable. | 
|  | final int maxBufferSize = cb.getMaxBufferSize(); | 
|  | int offset = 0; | 
|  | while (offset < mAudioBuffer.length) { | 
|  | int bytesToWrite = Math.min(maxBufferSize, mAudioBuffer.length - offset); | 
|  | cb.audioAvailable(mAudioBuffer, offset, bytesToWrite); | 
|  | offset += bytesToWrite; | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | private short getAmplitude() { | 
|  | boolean whisper = mSharedPrefs.getBoolean(GeneralSettingsFragment.WHISPER_KEY, false); | 
|  | return (short) (whisper ? 2048 : 8192); | 
|  | } | 
|  | } |