| // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_ |
| #define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_ |
| |
| #include <queue> |
| #include <set> |
| #include <string> |
| #include <vector> |
| |
| #include "base/memory/scoped_ptr.h" |
| #include "base/memory/singleton.h" |
| #include "base/memory/weak_ptr.h" |
| #include "url/gurl.h" |
| |
| class Utterance; |
| class TtsPlatformImpl; |
| class Profile; |
| |
| namespace base { |
| class Value; |
| } |
| |
| // Events sent back from the TTS engine indicating the progress. |
| enum TtsEventType { |
| TTS_EVENT_START, |
| TTS_EVENT_END, |
| TTS_EVENT_WORD, |
| TTS_EVENT_SENTENCE, |
| TTS_EVENT_MARKER, |
| TTS_EVENT_INTERRUPTED, |
| TTS_EVENT_CANCELLED, |
| TTS_EVENT_ERROR, |
| TTS_EVENT_PAUSE, |
| TTS_EVENT_RESUME |
| }; |
| |
| enum TtsGenderType { |
| TTS_GENDER_NONE, |
| TTS_GENDER_MALE, |
| TTS_GENDER_FEMALE |
| }; |
| |
| // Returns true if this event type is one that indicates an utterance |
| // is finished and can be destroyed. |
| bool IsFinalTtsEventType(TtsEventType event_type); |
| |
| // The continuous parameters that apply to a given utterance. |
| struct UtteranceContinuousParameters { |
| UtteranceContinuousParameters(); |
| |
| double rate; |
| double pitch; |
| double volume; |
| }; |
| |
| // Information about one voice. |
| struct VoiceData { |
| VoiceData(); |
| ~VoiceData(); |
| |
| std::string name; |
| std::string lang; |
| TtsGenderType gender; |
| std::string extension_id; |
| std::set<TtsEventType> events; |
| |
| // If true, the synthesis engine is a remote network resource. |
| // It may be higher latency and may incur bandwidth costs. |
| bool remote; |
| |
| // If true, this is implemented by this platform's subclass of |
| // TtsPlatformImpl. If false, this is implemented by an extension. |
| bool native; |
| std::string native_voice_identifier; |
| }; |
| |
| // Class that wants to receive events on utterances. |
| class UtteranceEventDelegate { |
| public: |
| virtual ~UtteranceEventDelegate() {} |
| virtual void OnTtsEvent(Utterance* utterance, |
| TtsEventType event_type, |
| int char_index, |
| const std::string& error_message) = 0; |
| }; |
| |
| // Class that wants to be notified when the set of |
| // voices has changed. |
| class VoicesChangedDelegate { |
| public: |
| virtual ~VoicesChangedDelegate() {} |
| virtual void OnVoicesChanged() = 0; |
| }; |
| |
| // One speech utterance. |
| class Utterance { |
| public: |
| // Construct an utterance given a profile and a completion task to call |
| // when the utterance is done speaking. Before speaking this utterance, |
| // its other parameters like text, rate, pitch, etc. should all be set. |
| explicit Utterance(Profile* profile); |
| virtual ~Utterance(); |
| |
| // Sends an event to the delegate. If the event type is TTS_EVENT_END |
| // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1, |
| // uses the last good value. |
| void OnTtsEvent(TtsEventType event_type, |
| int char_index, |
| const std::string& error_message); |
| |
| // Finish an utterance without sending an event to the delegate. |
| void Finish(); |
| |
| // Getters and setters for the text to speak and other speech options. |
| void set_text(const std::string& text) { text_ = text; } |
| const std::string& text() const { return text_; } |
| |
| void set_options(const base::Value* options); |
| const base::Value* options() const { return options_.get(); } |
| |
| void set_src_extension_id(const std::string& src_extension_id) { |
| src_extension_id_ = src_extension_id; |
| } |
| const std::string& src_extension_id() { return src_extension_id_; } |
| |
| void set_src_id(int src_id) { src_id_ = src_id; } |
| int src_id() { return src_id_; } |
| |
| void set_src_url(const GURL& src_url) { src_url_ = src_url; } |
| const GURL& src_url() { return src_url_; } |
| |
| void set_voice_name(const std::string& voice_name) { |
| voice_name_ = voice_name; |
| } |
| const std::string& voice_name() const { return voice_name_; } |
| |
| void set_lang(const std::string& lang) { |
| lang_ = lang; |
| } |
| const std::string& lang() const { return lang_; } |
| |
| void set_gender(TtsGenderType gender) { |
| gender_ = gender; |
| } |
| TtsGenderType gender() const { return gender_; } |
| |
| void set_continuous_parameters(const UtteranceContinuousParameters& params) { |
| continuous_parameters_ = params; |
| } |
| const UtteranceContinuousParameters& continuous_parameters() { |
| return continuous_parameters_; |
| } |
| |
| void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; } |
| bool can_enqueue() const { return can_enqueue_; } |
| |
| void set_required_event_types(const std::set<TtsEventType>& types) { |
| required_event_types_ = types; |
| } |
| const std::set<TtsEventType>& required_event_types() const { |
| return required_event_types_; |
| } |
| |
| void set_desired_event_types(const std::set<TtsEventType>& types) { |
| desired_event_types_ = types; |
| } |
| const std::set<TtsEventType>& desired_event_types() const { |
| return desired_event_types_; |
| } |
| |
| const std::string& extension_id() const { return extension_id_; } |
| void set_extension_id(const std::string& extension_id) { |
| extension_id_ = extension_id; |
| } |
| |
| UtteranceEventDelegate* event_delegate() const { |
| return event_delegate_.get(); |
| } |
| void set_event_delegate( |
| base::WeakPtr<UtteranceEventDelegate> event_delegate) { |
| event_delegate_ = event_delegate; |
| } |
| |
| // Getters and setters for internal state. |
| Profile* profile() const { return profile_; } |
| int id() const { return id_; } |
| bool finished() const { return finished_; } |
| |
| protected: |
| void set_finished_for_testing(bool finished) { finished_ = finished; } |
| |
| private: |
| // The profile that initiated this utterance. |
| Profile* profile_; |
| |
| // The extension ID of the extension providing TTS for this utterance, or |
| // empty if native TTS is being used. |
| std::string extension_id_; |
| |
| // The unique ID of this utterance, used to associate callback functions |
| // with utterances. |
| int id_; |
| |
| // The id of the next utterance, so we can associate requests with |
| // responses. |
| static int next_utterance_id_; |
| |
| // The text to speak. |
| std::string text_; |
| |
| // The full options arg passed to tts.speak, which may include fields |
| // other than the ones we explicitly parse, below. |
| scoped_ptr<base::Value> options_; |
| |
| // The extension ID of the extension that called speak() and should |
| // receive events. |
| std::string src_extension_id_; |
| |
| // The source extension's ID of this utterance, so that it can associate |
| // events with the appropriate callback. |
| int src_id_; |
| |
| // The URL of the page where the source extension called speak. |
| GURL src_url_; |
| |
| // The delegate to be called when an utterance event is fired. |
| base::WeakPtr<UtteranceEventDelegate> event_delegate_; |
| |
| // The parsed options. |
| std::string voice_name_; |
| std::string lang_; |
| TtsGenderType gender_; |
| UtteranceContinuousParameters continuous_parameters_; |
| bool can_enqueue_; |
| std::set<TtsEventType> required_event_types_; |
| std::set<TtsEventType> desired_event_types_; |
| |
| // The index of the current char being spoken. |
| int char_index_; |
| |
| // True if this utterance received an event indicating it's done. |
| bool finished_; |
| }; |
| |
| // Singleton class that manages text-to-speech for the TTS and TTS engine |
| // extension APIs, maintaining a queue of pending utterances and keeping |
| // track of all state. |
| class TtsController { |
| public: |
| // Get the single instance of this class. |
| static TtsController* GetInstance(); |
| |
| // Returns true if we're currently speaking an utterance. |
| bool IsSpeaking(); |
| |
| // Speak the given utterance. If the utterance's can_enqueue flag is true |
| // and another utterance is in progress, adds it to the end of the queue. |
| // Otherwise, interrupts any current utterance and speaks this one |
| // immediately. |
| void SpeakOrEnqueue(Utterance* utterance); |
| |
| // Stop all utterances and flush the queue. Implies leaving pause mode |
| // as well. |
| void Stop(); |
| |
| // Pause the speech queue. Some engines may support pausing in the middle |
| // of an utterance. |
| void Pause(); |
| |
| // Resume speaking. |
| void Resume(); |
| |
| // Handle events received from the speech engine. Events are forwarded to |
| // the callback function, and in addition, completion and error events |
| // trigger finishing the current utterance and starting the next one, if |
| // any. |
| void OnTtsEvent(int utterance_id, |
| TtsEventType event_type, |
| int char_index, |
| const std::string& error_message); |
| |
| // Return a list of all available voices, including the native voice, |
| // if supported, and all voices registered by extensions. |
| void GetVoices(Profile* profile, std::vector<VoiceData>* out_voices); |
| |
| // Called by the extension system or platform implementation when the |
| // list of voices may have changed and should be re-queried. |
| void VoicesChanged(); |
| |
| // Add a delegate that wants to be notified when the set of voices changes. |
| void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate); |
| |
| // Remove delegate that wants to be notified when the set of voices changes. |
| void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate); |
| |
| // For unit testing. |
| void SetPlatformImpl(TtsPlatformImpl* platform_impl); |
| int QueueSize(); |
| |
| protected: |
| TtsController(); |
| virtual ~TtsController(); |
| |
| private: |
| // Get the platform TTS implementation (or injected mock). |
| TtsPlatformImpl* GetPlatformImpl(); |
| |
| // Start speaking the given utterance. Will either take ownership of |
| // |utterance| or delete it if there's an error. Returns true on success. |
| void SpeakNow(Utterance* utterance); |
| |
| // Clear the utterance queue. If send_events is true, will send |
| // TTS_EVENT_CANCELLED events on each one. |
| void ClearUtteranceQueue(bool send_events); |
| |
| // Finalize and delete the current utterance. |
| void FinishCurrentUtterance(); |
| |
| // Start speaking the next utterance in the queue. |
| void SpeakNextUtterance(); |
| |
| // Given an utterance and a vector of voices, return the |
| // index of the voice that best matches the utterance. |
| int GetMatchingVoice(const Utterance* utterance, |
| std::vector<VoiceData>& voices); |
| |
| friend struct DefaultSingletonTraits<TtsController>; |
| |
| // The current utterance being spoken. |
| Utterance* current_utterance_; |
| |
| // Whether the queue is paused or not. |
| bool paused_; |
| |
| // A queue of utterances to speak after the current one finishes. |
| std::queue<Utterance*> utterance_queue_; |
| |
| // A set of delegates that want to be notified when the voices change. |
| std::set<VoicesChangedDelegate*> voices_changed_delegates_; |
| |
| // A pointer to the platform implementation of text-to-speech, for |
| // dependency injection. |
| TtsPlatformImpl* platform_impl_; |
| |
| DISALLOW_COPY_AND_ASSIGN(TtsController); |
| }; |
| |
| #endif // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_ |