chrome/browser/speech/tts_controller.h - platform/external/chromium_org - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
 #define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_

 #include <queue>
 #include <set>
 #include <string>
 #include <vector>

 #include "base/memory/scoped_ptr.h"
 #include "base/memory/singleton.h"
 #include "base/memory/weak_ptr.h"
 #include "url/gurl.h"

 class Utterance;
 class TtsPlatformImpl;
 class Profile;

 namespace base {
 class Value;
 }

 // Events sent back from the TTS engine indicating the progress.
 enum TtsEventType {
   TTS_EVENT_START,
   TTS_EVENT_END,
   TTS_EVENT_WORD,
   TTS_EVENT_SENTENCE,
   TTS_EVENT_MARKER,
   TTS_EVENT_INTERRUPTED,
   TTS_EVENT_CANCELLED,
   TTS_EVENT_ERROR,
   TTS_EVENT_PAUSE,
   TTS_EVENT_RESUME
 };

 enum TtsGenderType {
   TTS_GENDER_NONE,
   TTS_GENDER_MALE,
   TTS_GENDER_FEMALE
 };

 // Returns true if this event type is one that indicates an utterance
 // is finished and can be destroyed.
 bool IsFinalTtsEventType(TtsEventType event_type);

 // The continuous parameters that apply to a given utterance.
 struct UtteranceContinuousParameters {
   UtteranceContinuousParameters();

   double rate;
   double pitch;
   double volume;
 };

 // Information about one voice.
 struct VoiceData {
   VoiceData();
   ~VoiceData();

   std::string name;
   std::string lang;
   TtsGenderType gender;
   std::string extension_id;
   std::set<TtsEventType> events;

   // If true, the synthesis engine is a remote network resource.
   // It may be higher latency and may incur bandwidth costs.
   bool remote;

   // If true, this is implemented by this platform's subclass of
   // TtsPlatformImpl. If false, this is implemented by an extension.
   bool native;
   std::string native_voice_identifier;
 };

 // Class that wants to receive events on utterances.
 class UtteranceEventDelegate {
  public:
   virtual ~UtteranceEventDelegate() {}
   virtual void OnTtsEvent(Utterance* utterance,
                           TtsEventType event_type,
                           int char_index,
                           const std::string& error_message) = 0;
 };

 // Class that wants to be notified when the set of
 // voices has changed.
 class VoicesChangedDelegate {
  public:
   virtual ~VoicesChangedDelegate() {}
   virtual void OnVoicesChanged() = 0;
 };

 // One speech utterance.
 class Utterance {
  public:
   // Construct an utterance given a profile and a completion task to call
   // when the utterance is done speaking. Before speaking this utterance,
   // its other parameters like text, rate, pitch, etc. should all be set.
   explicit Utterance(Profile* profile);
   ~Utterance();

   // Sends an event to the delegate. If the event type is TTS_EVENT_END
   // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1,
   // uses the last good value.
   void OnTtsEvent(TtsEventType event_type,
                   int char_index,
                   const std::string& error_message);

   // Finish an utterance without sending an event to the delegate.
   void Finish();

   // Getters and setters for the text to speak and other speech options.
   void set_text(const std::string& text) { text_ = text; }
   const std::string& text() const { return text_; }

   void set_options(const base::Value* options);
   const base::Value* options() const { return options_.get(); }

   void set_src_extension_id(const std::string& src_extension_id) {
     src_extension_id_ = src_extension_id;
   }
   const std::string& src_extension_id() { return src_extension_id_; }

   void set_src_id(int src_id) { src_id_ = src_id; }
   int src_id() { return src_id_; }

   void set_src_url(const GURL& src_url) { src_url_ = src_url; }
   const GURL& src_url() { return src_url_; }

   void set_voice_name(const std::string& voice_name) {
     voice_name_ = voice_name;
   }
   const std::string& voice_name() const { return voice_name_; }

   void set_lang(const std::string& lang) {
     lang_ = lang;
   }
   const std::string& lang() const { return lang_; }

   void set_gender(TtsGenderType gender) {
     gender_ = gender;
   }
   TtsGenderType gender() const { return gender_; }

   void set_continuous_parameters(const UtteranceContinuousParameters& params) {
     continuous_parameters_ = params;
   }
   const UtteranceContinuousParameters& continuous_parameters() {
     return continuous_parameters_;
   }

   void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
   bool can_enqueue() const { return can_enqueue_; }

   void set_required_event_types(const std::set<TtsEventType>& types) {
     required_event_types_ = types;
   }
   const std::set<TtsEventType>& required_event_types() const {
     return required_event_types_;
   }

   void set_desired_event_types(const std::set<TtsEventType>& types) {
     desired_event_types_ = types;
   }
   const std::set<TtsEventType>& desired_event_types() const {
     return desired_event_types_;
   }

   const std::string& extension_id() const { return extension_id_; }
   void set_extension_id(const std::string& extension_id) {
     extension_id_ = extension_id;
   }

   UtteranceEventDelegate* event_delegate() const {
     return event_delegate_.get();
   }
   void set_event_delegate(
       base::WeakPtr<UtteranceEventDelegate> event_delegate) {
     event_delegate_ = event_delegate;
   }

   // Getters and setters for internal state.
   Profile* profile() const { return profile_; }
   int id() const { return id_; }
   bool finished() const { return finished_; }

  private:
   // The profile that initiated this utterance.
   Profile* profile_;

   // The extension ID of the extension providing TTS for this utterance, or
   // empty if native TTS is being used.
   std::string extension_id_;

   // The unique ID of this utterance, used to associate callback functions
   // with utterances.
   int id_;

   // The id of the next utterance, so we can associate requests with
   // responses.
   static int next_utterance_id_;

   // The text to speak.
   std::string text_;

   // The full options arg passed to tts.speak, which may include fields
   // other than the ones we explicitly parse, below.
   scoped_ptr<base::Value> options_;

   // The extension ID of the extension that called speak() and should
   // receive events.
   std::string src_extension_id_;

   // The source extension's ID of this utterance, so that it can associate
   // events with the appropriate callback.
   int src_id_;

   // The URL of the page where the source extension called speak.
   GURL src_url_;

   // The delegate to be called when an utterance event is fired.
   base::WeakPtr<UtteranceEventDelegate> event_delegate_;

   // The parsed options.
   std::string voice_name_;
   std::string lang_;
   TtsGenderType gender_;
   UtteranceContinuousParameters continuous_parameters_;
   bool can_enqueue_;
   std::set<TtsEventType> required_event_types_;
   std::set<TtsEventType> desired_event_types_;

   // The index of the current char being spoken.
   int char_index_;

   // True if this utterance received an event indicating it's done.
   bool finished_;
 };

 // Singleton class that manages text-to-speech for the TTS and TTS engine
 // extension APIs, maintaining a queue of pending utterances and keeping
 // track of all state.
 class TtsController {
  public:
   // Get the single instance of this class.
   static TtsController* GetInstance();

   // Returns true if we're currently speaking an utterance.
   bool IsSpeaking();

   // Speak the given utterance. If the utterance's can_enqueue flag is true
   // and another utterance is in progress, adds it to the end of the queue.
   // Otherwise, interrupts any current utterance and speaks this one
   // immediately.
   void SpeakOrEnqueue(Utterance* utterance);

   // Stop all utterances and flush the queue. Implies leaving pause mode
   // as well.
   void Stop();

   // Pause the speech queue. Some engines may support pausing in the middle
   // of an utterance.
   void Pause();

   // Resume speaking.
   void Resume();

   // Handle events received from the speech engine. Events are forwarded to
   // the callback function, and in addition, completion and error events
   // trigger finishing the current utterance and starting the next one, if
   // any.
   void OnTtsEvent(int utterance_id,
                   TtsEventType event_type,
                   int char_index,
                   const std::string& error_message);

   // Return a list of all available voices, including the native voice,
   // if supported, and all voices registered by extensions.
   void GetVoices(Profile* profile, std::vector<VoiceData>* out_voices);

   // Called by TtsExtensionLoaderChromeOs::LoadTtsExtension when it
   // finishes loading the built-in TTS component extension.
   void RetrySpeakingQueuedUtterances();

   // Called by the extension system or platform implementation when the
   // list of voices may have changed and should be re-queried.
   void VoicesChanged();

   // Add a delegate that wants to be notified when the set of voices changes.
   void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate);

   // Remove delegate that wants to be notified when the set of voices changes.
   void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate);

   // For unit testing.
   void SetPlatformImpl(TtsPlatformImpl* platform_impl);
   int QueueSize();

  protected:
   TtsController();
   virtual ~TtsController();

  private:
   // Get the platform TTS implementation (or injected mock).
   TtsPlatformImpl* GetPlatformImpl();

   // Start speaking the given utterance. Will either take ownership of
   // |utterance| or delete it if there's an error. Returns true on success.
   void SpeakNow(Utterance* utterance);

   // Clear the utterance queue. If send_events is true, will send
   // TTS_EVENT_CANCELLED events on each one.
   void ClearUtteranceQueue(bool send_events);

   // Finalize and delete the current utterance.
   void FinishCurrentUtterance();

   // Start speaking the next utterance in the queue.
   void SpeakNextUtterance();

   // Given an utterance and a vector of voices, return the
   // index of the voice that best matches the utterance.
   int GetMatchingVoice(const Utterance* utterance,
                        std::vector<VoiceData>& voices);

   friend struct DefaultSingletonTraits<TtsController>;

   // The current utterance being spoken.
   Utterance* current_utterance_;

   // Whether the queue is paused or not.
   bool paused_;

   // A queue of utterances to speak after the current one finishes.
   std::queue<Utterance*> utterance_queue_;

   // A set of delegates that want to be notified when the voices change.
   std::set<VoicesChangedDelegate*> voices_changed_delegates_;

   // A pointer to the platform implementation of text-to-speech, for
   // dependency injection.
   TtsPlatformImpl* platform_impl_;

   DISALLOW_COPY_AND_ASSIGN(TtsController);
 };

 #endif  // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
	// Copyright (c) 2012 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
	#define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_

	#include <queue>
	#include <set>
	#include <string>
	#include <vector>

	#include "base/memory/scoped_ptr.h"
	#include "base/memory/singleton.h"
	#include "base/memory/weak_ptr.h"
	#include "url/gurl.h"

	class Utterance;
	class TtsPlatformImpl;
	class Profile;

	namespace base {
	class Value;
	}

	// Events sent back from the TTS engine indicating the progress.
	enum TtsEventType {
	TTS_EVENT_START,
	TTS_EVENT_END,
	TTS_EVENT_WORD,
	TTS_EVENT_SENTENCE,
	TTS_EVENT_MARKER,
	TTS_EVENT_INTERRUPTED,
	TTS_EVENT_CANCELLED,
	TTS_EVENT_ERROR,
	TTS_EVENT_PAUSE,
	TTS_EVENT_RESUME
	};

	enum TtsGenderType {
	TTS_GENDER_NONE,
	TTS_GENDER_MALE,
	TTS_GENDER_FEMALE
	};

	// Returns true if this event type is one that indicates an utterance
	// is finished and can be destroyed.
	bool IsFinalTtsEventType(TtsEventType event_type);

	// The continuous parameters that apply to a given utterance.
	struct UtteranceContinuousParameters {
	UtteranceContinuousParameters();

	double rate;
	double pitch;
	double volume;
	};

	// Information about one voice.
	struct VoiceData {
	VoiceData();
	~VoiceData();

	std::string name;
	std::string lang;
	TtsGenderType gender;
	std::string extension_id;
	std::set<TtsEventType> events;

	// If true, the synthesis engine is a remote network resource.
	// It may be higher latency and may incur bandwidth costs.
	bool remote;

	// If true, this is implemented by this platform's subclass of
	// TtsPlatformImpl. If false, this is implemented by an extension.
	bool native;
	std::string native_voice_identifier;
	};

	// Class that wants to receive events on utterances.
	class UtteranceEventDelegate {
	public:
	virtual ~UtteranceEventDelegate() {}
	virtual void OnTtsEvent(Utterance* utterance,
	TtsEventType event_type,
	int char_index,
	const std::string& error_message) = 0;
	};

	// Class that wants to be notified when the set of
	// voices has changed.
	class VoicesChangedDelegate {
	public:
	virtual ~VoicesChangedDelegate() {}
	virtual void OnVoicesChanged() = 0;
	};

	// One speech utterance.
	class Utterance {
	public:
	// Construct an utterance given a profile and a completion task to call
	// when the utterance is done speaking. Before speaking this utterance,
	// its other parameters like text, rate, pitch, etc. should all be set.
	explicit Utterance(Profile* profile);
	~Utterance();

	// Sends an event to the delegate. If the event type is TTS_EVENT_END
	// or TTS_EVENT_ERROR, deletes the utterance. If \|char_index\| is -1,
	// uses the last good value.
	void OnTtsEvent(TtsEventType event_type,
	int char_index,
	const std::string& error_message);

	// Finish an utterance without sending an event to the delegate.
	void Finish();

	// Getters and setters for the text to speak and other speech options.
	void set_text(const std::string& text) { text_ = text; }
	const std::string& text() const { return text_; }

	void set_options(const base::Value* options);
	const base::Value* options() const { return options_.get(); }

	void set_src_extension_id(const std::string& src_extension_id) {
	src_extension_id_ = src_extension_id;
	}
	const std::string& src_extension_id() { return src_extension_id_; }

	void set_src_id(int src_id) { src_id_ = src_id; }
	int src_id() { return src_id_; }

	void set_src_url(const GURL& src_url) { src_url_ = src_url; }
	const GURL& src_url() { return src_url_; }

	void set_voice_name(const std::string& voice_name) {
	voice_name_ = voice_name;
	}
	const std::string& voice_name() const { return voice_name_; }

	void set_lang(const std::string& lang) {
	lang_ = lang;
	}
	const std::string& lang() const { return lang_; }

	void set_gender(TtsGenderType gender) {
	gender_ = gender;
	}
	TtsGenderType gender() const { return gender_; }

	void set_continuous_parameters(const UtteranceContinuousParameters& params) {
	continuous_parameters_ = params;
	}
	const UtteranceContinuousParameters& continuous_parameters() {
	return continuous_parameters_;
	}

	void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
	bool can_enqueue() const { return can_enqueue_; }

	void set_required_event_types(const std::set<TtsEventType>& types) {
	required_event_types_ = types;
	}
	const std::set<TtsEventType>& required_event_types() const {
	return required_event_types_;
	}

	void set_desired_event_types(const std::set<TtsEventType>& types) {
	desired_event_types_ = types;
	}
	const std::set<TtsEventType>& desired_event_types() const {
	return desired_event_types_;
	}

	const std::string& extension_id() const { return extension_id_; }
	void set_extension_id(const std::string& extension_id) {
	extension_id_ = extension_id;
	}

	UtteranceEventDelegate* event_delegate() const {
	return event_delegate_.get();
	}
	void set_event_delegate(
	base::WeakPtr<UtteranceEventDelegate> event_delegate) {
	event_delegate_ = event_delegate;
	}

	// Getters and setters for internal state.
	Profile* profile() const { return profile_; }
	int id() const { return id_; }
	bool finished() const { return finished_; }

	private:
	// The profile that initiated this utterance.
	Profile* profile_;

	// The extension ID of the extension providing TTS for this utterance, or
	// empty if native TTS is being used.
	std::string extension_id_;

	// The unique ID of this utterance, used to associate callback functions
	// with utterances.
	int id_;

	// The id of the next utterance, so we can associate requests with
	// responses.
	static int next_utterance_id_;

	// The text to speak.
	std::string text_;

	// The full options arg passed to tts.speak, which may include fields
	// other than the ones we explicitly parse, below.
	scoped_ptr<base::Value> options_;

	// The extension ID of the extension that called speak() and should
	// receive events.
	std::string src_extension_id_;

	// The source extension's ID of this utterance, so that it can associate
	// events with the appropriate callback.
	int src_id_;

	// The URL of the page where the source extension called speak.
	GURL src_url_;

	// The delegate to be called when an utterance event is fired.
	base::WeakPtr<UtteranceEventDelegate> event_delegate_;

	// The parsed options.
	std::string voice_name_;
	std::string lang_;
	TtsGenderType gender_;
	UtteranceContinuousParameters continuous_parameters_;
	bool can_enqueue_;
	std::set<TtsEventType> required_event_types_;
	std::set<TtsEventType> desired_event_types_;

	// The index of the current char being spoken.
	int char_index_;

	// True if this utterance received an event indicating it's done.
	bool finished_;
	};

	// Singleton class that manages text-to-speech for the TTS and TTS engine
	// extension APIs, maintaining a queue of pending utterances and keeping
	// track of all state.
	class TtsController {
	public:
	// Get the single instance of this class.
	static TtsController* GetInstance();

	// Returns true if we're currently speaking an utterance.
	bool IsSpeaking();

	// Speak the given utterance. If the utterance's can_enqueue flag is true
	// and another utterance is in progress, adds it to the end of the queue.
	// Otherwise, interrupts any current utterance and speaks this one
	// immediately.
	void SpeakOrEnqueue(Utterance* utterance);

	// Stop all utterances and flush the queue. Implies leaving pause mode
	// as well.
	void Stop();

	// Pause the speech queue. Some engines may support pausing in the middle
	// of an utterance.
	void Pause();

	// Resume speaking.
	void Resume();

	// Handle events received from the speech engine. Events are forwarded to
	// the callback function, and in addition, completion and error events
	// trigger finishing the current utterance and starting the next one, if
	// any.
	void OnTtsEvent(int utterance_id,
	TtsEventType event_type,
	int char_index,
	const std::string& error_message);

	// Return a list of all available voices, including the native voice,
	// if supported, and all voices registered by extensions.
	void GetVoices(Profile* profile, std::vector<VoiceData>* out_voices);

	// Called by TtsExtensionLoaderChromeOs::LoadTtsExtension when it
	// finishes loading the built-in TTS component extension.
	void RetrySpeakingQueuedUtterances();

	// Called by the extension system or platform implementation when the
	// list of voices may have changed and should be re-queried.
	void VoicesChanged();

	// Add a delegate that wants to be notified when the set of voices changes.
	void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate);

	// Remove delegate that wants to be notified when the set of voices changes.
	void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate);

	// For unit testing.
	void SetPlatformImpl(TtsPlatformImpl* platform_impl);
	int QueueSize();

	protected:
	TtsController();
	virtual ~TtsController();

	private:
	// Get the platform TTS implementation (or injected mock).
	TtsPlatformImpl* GetPlatformImpl();

	// Start speaking the given utterance. Will either take ownership of
	// \|utterance\| or delete it if there's an error. Returns true on success.
	void SpeakNow(Utterance* utterance);

	// Clear the utterance queue. If send_events is true, will send
	// TTS_EVENT_CANCELLED events on each one.
	void ClearUtteranceQueue(bool send_events);

	// Finalize and delete the current utterance.
	void FinishCurrentUtterance();

	// Start speaking the next utterance in the queue.
	void SpeakNextUtterance();

	// Given an utterance and a vector of voices, return the
	// index of the voice that best matches the utterance.
	int GetMatchingVoice(const Utterance* utterance,
	std::vector<VoiceData>& voices);

	friend struct DefaultSingletonTraits<TtsController>;

	// The current utterance being spoken.
	Utterance* current_utterance_;

	// Whether the queue is paused or not.
	bool paused_;

	// A queue of utterances to speak after the current one finishes.
	std::queue<Utterance*> utterance_queue_;

	// A set of delegates that want to be notified when the voices change.
	std::set<VoicesChangedDelegate*> voices_changed_delegates_;

	// A pointer to the platform implementation of text-to-speech, for
	// dependency injection.
	TtsPlatformImpl* platform_impl_;

	DISALLOW_COPY_AND_ASSIGN(TtsController);
	};

	#endif // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_