deepgram.clients.speak.client

 1# Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
 2# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
 3# SPDX-License-Identifier: MIT
 4
 5# rest
 6from .v1 import (
 7    #### top level
 8    SpeakOptions as SpeakOptionsLatest,
 9    SpeakRESTOptions as SpeakRESTOptionsLatest,
10    # common
11    TextSource as TextSourceLatest,
12    BufferSource as BufferSourceLatest,
13    StreamSource as StreamSourceLatest,
14    FileSource as FileSourceLatest,
15    # unique
16    SpeakSource as SpeakSourceLatest,
17    SpeakRestSource as SpeakRestSourceLatest,
18    SpeakRESTSource as SpeakRESTSourceLatest,
19)
20
21from .v1 import (
22    SpeakRESTClient as SpeakRESTClientLatest,
23    AsyncSpeakRESTClient as AsyncSpeakRESTClientLatest,
24)
25
26from .v1 import (
27    SpeakRESTResponse as SpeakRESTResponseLatest,
28)
29
30# websocket
31from .v1 import (
32    SpeakWebSocketClient as SpeakWebSocketClientLatest,
33    AsyncSpeakWebSocketClient as AsyncSpeakWebSocketClientLatest,
34    SpeakWSClient as SpeakWSClientLatest,
35    AsyncSpeakWSClient as AsyncSpeakWSClientLatest,
36)
37
38from .v1 import (
39    SpeakWSOptions as SpeakWSOptionsLatest,
40)
41from .v1 import (
42    OpenResponse as OpenResponseLatest,
43    SpeakWSMetadataResponse as SpeakWSMetadataResponseLatest,
44    FlushedResponse as FlushedResponseLatest,
45    ClearedResponse as ClearedResponseLatest,
46    CloseResponse as CloseResponseLatest,
47    UnhandledResponse as UnhandledResponseLatest,
48    WarningResponse as WarningResponseLatest,
49    ErrorResponse as ErrorResponseLatest,
50)
51
52# The client.py points to the current supported version in the SDK.
53# Older versions are supported in the SDK for backwards compatibility.
54
55# rest
56# input
57SpeakOptions = SpeakOptionsLatest
58SpeakRESTOptions = SpeakRESTOptionsLatest
59TextSource = TextSourceLatest
60BufferSource = BufferSourceLatest
61StreamSource = StreamSourceLatest
62FileSource = FileSourceLatest
63SpeakSource = SpeakSourceLatest
64SpeakRestSource = SpeakRestSourceLatest
65SpeakRESTSource = SpeakRESTSourceLatest  # pylint: disable=invalid-name
66
67# output
68SpeakRESTResponse = SpeakRESTResponseLatest
69
70# websocket
71# input
72SpeakWSOptions = SpeakWSOptionsLatest
73
74# output
75OpenResponse = OpenResponseLatest
76SpeakWSMetadataResponse = SpeakWSMetadataResponseLatest
77FlushedResponse = FlushedResponseLatest
78ClearedResponse = ClearedResponseLatest
79CloseResponse = CloseResponseLatest
80UnhandledResponse = UnhandledResponseLatest
81WarningResponse = WarningResponseLatest
82ErrorResponse = ErrorResponseLatest
83
84
85# backward compatibility
86SpeakResponse = SpeakRESTResponseLatest
87SpeakClient = SpeakRESTClientLatest
88
89# clients
90SpeakRESTClient = SpeakRESTClientLatest
91AsyncSpeakRESTClient = AsyncSpeakRESTClientLatest
92SpeakWSClient = SpeakWSClientLatest
93AsyncSpeakWSClient = AsyncSpeakWSClientLatest
94SpeakWebSocketClient = SpeakWebSocketClientLatest
95AsyncSpeakWebSocketClient = AsyncSpeakWebSocketClientLatest
SpeakOptions = <class 'SpeakRESTOptions'>
@dataclass
class SpeakRESTOptions(deepgram.clients.common.v1.shared_response.BaseResponse):
17@dataclass
18class SpeakRESTOptions(BaseResponse):
19    """
20    Contains all the options for the SpeakOptions.
21
22    Reference:
23    https://developers.deepgram.com/reference/text-to-speech-api
24    """
25
26    model: Optional[str] = field(
27        default="aura-asteria-en",
28        metadata=dataclass_config(exclude=lambda f: f is None),
29    )
30    encoding: Optional[str] = field(
31        default=None, metadata=dataclass_config(exclude=lambda f: f is None)
32    )
33    container: Optional[str] = field(
34        default=None, metadata=dataclass_config(exclude=lambda f: f is None)
35    )
36    sample_rate: Optional[int] = field(
37        default=None, metadata=dataclass_config(exclude=lambda f: f is None)
38    )
39    bit_rate: Optional[int] = field(
40        default=None, metadata=dataclass_config(exclude=lambda f: f is None)
41    )
42
43    def check(self):
44        """
45        Check the SpeakOptions for any missing or invalid values.
46        """
47        logger = verboselogs.VerboseLogger(__name__)
48        logger.addHandler(logging.StreamHandler())
49        prev = logger.level
50        logger.setLevel(verboselogs.ERROR)
51
52        # no op at the moment
53
54        logger.setLevel(prev)
55
56        return True

Contains all the options for the SpeakOptions.

Reference: https://developers.deepgram.com/reference/text-to-speech-api

SpeakRESTOptions( model: Optional[str] = 'aura-asteria-en', encoding: Optional[str] = None, container: Optional[str] = None, sample_rate: Optional[int] = None, bit_rate: Optional[int] = None)
model: Optional[str] = 'aura-asteria-en'
encoding: Optional[str] = None
container: Optional[str] = None
sample_rate: Optional[int] = None
bit_rate: Optional[int] = None
def check(self):
43    def check(self):
44        """
45        Check the SpeakOptions for any missing or invalid values.
46        """
47        logger = verboselogs.VerboseLogger(__name__)
48        logger.addHandler(logging.StreamHandler())
49        prev = logger.level
50        logger.setLevel(verboselogs.ERROR)
51
52        # no op at the moment
53
54        logger.setLevel(prev)
55
56        return True

Check the SpeakOptions for any missing or invalid values.

class TextSource(typing_extensions.TypedDict):
53class TextSource(TypedDict):
54    """
55    Represents a data source for reading binary data from a text-like source.
56
57    This class is used to specify a source of text data that can be read from.
58
59    Attributes:
60        text (str): A string for reading text data.
61    """
62
63    text: str

Represents a data source for reading binary data from a text-like source.

This class is used to specify a source of text data that can be read from.

Attributes: text (str): A string for reading text data.

text: str
class BufferSource(typing_extensions.TypedDict):
39class BufferSource(TypedDict):
40    """
41    Represents a data source for handling raw binary data.
42
43    This class is used to specify raw binary data, such as audio data in its
44    binary form, which can be captured from a microphone or generated synthetically.
45
46    Attributes:
47        buffer (bytes): The binary data.
48    """
49
50    buffer: bytes

Represents a data source for handling raw binary data.

This class is used to specify raw binary data, such as audio data in its binary form, which can be captured from a microphone or generated synthetically.

Attributes: buffer (bytes): The binary data.

buffer: bytes
class StreamSource(typing_extensions.TypedDict):
11class StreamSource(TypedDict):
12    """
13    Represents a data source for reading binary data from a stream-like source.
14
15    This class is used to specify a source of binary data that can be read from
16    a stream, such as an audio file in .wav format.
17
18    Attributes:
19        stream (BufferedReader): A BufferedReader object for reading binary data.
20    """
21
22    stream: BufferedReader

Represents a data source for reading binary data from a stream-like source.

This class is used to specify a source of binary data that can be read from a stream, such as an audio file in .wav format.

Attributes: stream (BufferedReader): A BufferedReader object for reading binary data.

stream: _io.BufferedReader
FileSource = typing.Union[TextSource, BufferSource, StreamSource]
SpeakSource = typing.Union[TextSource, BufferSource, StreamSource, _io.BufferedReader]
SpeakRestSource = typing.Union[TextSource, BufferSource, StreamSource, _io.BufferedReader]
SpeakRESTSource = typing.Union[TextSource, BufferSource, StreamSource, _io.BufferedReader]
@dataclass
class SpeakRESTResponse(deepgram.clients.common.v1.shared_response.BaseResponse):
20@dataclass
21class SpeakRESTResponse(BaseResponse):  # pylint: disable=too-many-instance-attributes
22    """
23    A class for representing a response from the speak endpoint.
24    """
25
26    content_type: str = ""
27    request_id: str = ""
28    model_uuid: str = ""
29    model_name: str = ""
30    characters: int = 0
31    transfer_encoding: str = ""
32    date: str = ""
33    filename: Optional[str] = field(
34        default=None, metadata=dataclass_config(exclude=lambda f: f is None)
35    )
36    # pylint: disable=W0511
37    # TODO: stream will be deprecated in a future release. Please use stream_memory instead.
38    stream: Optional[io.BytesIO] = field(
39        default=None,
40        metadata=dataclass_config(exclude=lambda f: True),
41    )
42    # pylint: enable=W0511
43    stream_memory: Optional[io.BytesIO] = field(
44        default=None,
45        metadata=dataclass_config(exclude=lambda f: True),
46    )

A class for representing a response from the speak endpoint.

SpeakRESTResponse( content_type: str = '', request_id: str = '', model_uuid: str = '', model_name: str = '', characters: int = 0, transfer_encoding: str = '', date: str = '', filename: Optional[str] = None, stream: Optional[_io.BytesIO] = None, stream_memory: Optional[_io.BytesIO] = None)
content_type: str = ''
request_id: str = ''
model_uuid: str = ''
model_name: str = ''
characters: int = 0
transfer_encoding: str = ''
date: str = ''
filename: Optional[str] = None
stream: Optional[_io.BytesIO] = None
stream_memory: Optional[_io.BytesIO] = None
@dataclass
class SpeakWSOptions(deepgram.clients.common.v1.shared_response.BaseResponse):
17@dataclass
18class SpeakWSOptions(BaseResponse):
19    """
20    Contains all the options for the SpeakOptions.
21
22    Reference:
23    https://developers.deepgram.com/reference/transform-text-to-speech-websocket
24    """
25
26    model: Optional[str] = field(
27        default="aura-asteria-en",
28        metadata=dataclass_config(exclude=lambda f: f is None),
29    )
30    encoding: Optional[str] = field(
31        default=None, metadata=dataclass_config(exclude=lambda f: f is None)
32    )
33    # container: Optional[str] = field(
34    #     default=None, metadata=dataclass_config(exclude=lambda f: f is None)
35    # )
36    sample_rate: Optional[int] = field(
37        default=None, metadata=dataclass_config(exclude=lambda f: f is None)
38    )
39    bit_rate: Optional[int] = field(
40        default=None, metadata=dataclass_config(exclude=lambda f: f is None)
41    )
42
43    def __getitem__(self, key):
44        _dict = self.to_dict()
45        return _dict[key]
46
47    def __setitem__(self, key, val):
48        self.__dict__[key] = val
49
50    def __str__(self) -> str:
51        return self.to_json(indent=4)
52
53    def check(self):
54        """
55        Check the SpeakOptions for any missing or invalid values.
56        """
57        logger = verboselogs.VerboseLogger(__name__)
58        logger.addHandler(logging.StreamHandler())
59        prev = logger.level
60        logger.setLevel(verboselogs.ERROR)
61
62        # no op at the moment
63
64        logger.setLevel(prev)
65
66        return True

Contains all the options for the SpeakOptions.

Reference: https://developers.deepgram.com/reference/transform-text-to-speech-websocket

SpeakWSOptions( model: Optional[str] = 'aura-asteria-en', encoding: Optional[str] = None, sample_rate: Optional[int] = None, bit_rate: Optional[int] = None)
model: Optional[str] = 'aura-asteria-en'
encoding: Optional[str] = None
sample_rate: Optional[int] = None
bit_rate: Optional[int] = None
def check(self):
53    def check(self):
54        """
55        Check the SpeakOptions for any missing or invalid values.
56        """
57        logger = verboselogs.VerboseLogger(__name__)
58        logger.addHandler(logging.StreamHandler())
59        prev = logger.level
60        logger.setLevel(verboselogs.ERROR)
61
62        # no op at the moment
63
64        logger.setLevel(prev)
65
66        return True

Check the SpeakOptions for any missing or invalid values.

@dataclass
class OpenResponse(deepgram.clients.common.v1.shared_response.BaseResponse):
17@dataclass
18class OpenResponse(BaseResponse):
19    """
20    Open Message from the Deepgram Platform
21    """
22
23    type: str = ""

Open Message from the Deepgram Platform

OpenResponse(type: str = '')
type: str = ''
@dataclass
class FlushedResponse(deepgram.clients.common.v1.shared_response.BaseResponse):
31@dataclass
32class FlushedResponse(BaseResponse):
33    """
34    Flushed Message from the Deepgram Platform
35    """
36
37    type: str = ""
38    sequence_id: int = 0

Flushed Message from the Deepgram Platform

FlushedResponse(type: str = '', sequence_id: int = 0)
type: str = ''
sequence_id: int = 0
@dataclass
class ClearedResponse(deepgram.clients.common.v1.shared_response.BaseResponse):
41@dataclass
42class ClearedResponse(BaseResponse):
43    """
44    Cleared object
45    """
46
47    type: str = ""
48    sequence_id: int = 0

Cleared object

ClearedResponse(type: str = '', sequence_id: int = 0)
type: str = ''
sequence_id: int = 0
@dataclass
class CloseResponse(deepgram.clients.common.v1.shared_response.BaseResponse):
29@dataclass
30class CloseResponse(BaseResponse):
31    """
32    Close Message from the Deepgram Platform
33    """
34
35    type: str = ""

Close Message from the Deepgram Platform

CloseResponse(type: str = '')
type: str = ''
@dataclass
class UnhandledResponse(deepgram.clients.common.v1.shared_response.BaseResponse):
58@dataclass
59class UnhandledResponse(BaseResponse):
60    """
61    Unhandled Message from the Deepgram Platform
62    """
63
64    type: str = ""
65    raw: str = ""

Unhandled Message from the Deepgram Platform

UnhandledResponse(type: str = '', raw: str = '')
type: str = ''
raw: str = ''
@dataclass
class WarningResponse(deepgram.clients.common.v1.shared_response.BaseResponse):
51@dataclass
52class WarningResponse(BaseResponse):
53    """
54    Warning Message from the Deepgram Platform
55    """
56
57    warn_code: str = ""
58    warn_msg: str = ""
59    type: str = ""

Warning Message from the Deepgram Platform

WarningResponse(warn_code: str = '', warn_msg: str = '', type: str = '')
warn_code: str = ''
warn_msg: str = ''
type: str = ''
@dataclass
class ErrorResponse(deepgram.clients.common.v1.shared_response.BaseResponse):
41@dataclass
42class ErrorResponse(BaseResponse):
43    """
44    Error Message from the Deepgram Platform
45    """
46
47    description: str = ""
48    message: str = ""
49    type: str = ""
50    variant: Optional[str] = field(
51        default=None, metadata=dataclass_config(exclude=lambda f: f is None)
52    )

Error Message from the Deepgram Platform

ErrorResponse( description: str = '', message: str = '', type: str = '', variant: Optional[str] = None)
description: str = ''
message: str = ''
type: str = ''
variant: Optional[str] = None
SpeakResponse = <class 'SpeakRESTResponse'>
SpeakClient = <class 'SpeakRESTClient'>
 25class SpeakRESTClient(AbstractSyncRestClient):
 26    """
 27    A client class for doing Text-to-Speech.
 28    Provides methods for speaking from text.
 29    """
 30
 31    _logger: verboselogs.VerboseLogger
 32    _config: DeepgramClientOptions
 33
 34    def __init__(self, config: DeepgramClientOptions):
 35        self._logger = verboselogs.VerboseLogger(__name__)
 36        self._logger.addHandler(logging.StreamHandler())
 37        self._logger.setLevel(config.verbose)
 38        self._config = config
 39        super().__init__(config)
 40
 41    # pylint: disable=too-many-positional-arguments
 42
 43    def stream_raw(
 44        self,
 45        source: FileSource,
 46        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
 47        addons: Optional[Dict] = None,
 48        headers: Optional[Dict] = None,
 49        timeout: Optional[httpx.Timeout] = None,
 50        endpoint: str = "v1/speak",
 51        **kwargs,
 52    ) -> httpx.Response:
 53        """
 54        Speak from a text source and store as a Iterator[byte].
 55
 56        Args:
 57            source (TextSource): The text source to speak.
 58            options (SpeakRESTOptions): Additional options for the ingest (default is None).
 59            addons (Dict): Additional options for the request (default is None).
 60            headers (Dict): Additional headers for the request (default is None).
 61            timeout (httpx.Timeout): The timeout for the request (default is None).
 62            endpoint (str): The endpoint to use for the request (default is "v1/speak").
 63
 64        Returns:
 65            httpx.Response: The direct httpx.Response object from the speak request.
 66            For more information, see https://www.python-httpx.org/api/#response
 67
 68            IMPORTANT: The response object's `close()` method should be called when done
 69            in order to prevent connection leaks.
 70
 71        Raises:
 72            DeepgramTypeError: Raised for known API errors.
 73        """
 74        self._logger.debug("SpeakClient.stream ENTER")
 75
 76        url = f"{self._config.url}/{endpoint}"
 77        if is_text_source(source):
 78            body = source
 79        else:
 80            self._logger.error("Unknown speak source type")
 81            self._logger.debug("SpeakClient.stream LEAVE")
 82            raise DeepgramTypeError("Unknown speak source type")
 83
 84        if isinstance(options, SpeakRESTOptions) and not options.check():
 85            self._logger.error("options.check failed")
 86            self._logger.debug("SpeakClient.stream LEAVE")
 87            raise DeepgramError("Fatal speak options error")
 88
 89        self._logger.info("url: %s", url)
 90        self._logger.info("source: %s", source)
 91        if isinstance(options, SpeakRESTOptions):
 92            self._logger.info("SpeakRESTOptions switching class -> dict")
 93            options = options.to_dict()
 94        self._logger.info("options: %s", options)
 95        self._logger.info("addons: %s", addons)
 96        self._logger.info("headers: %s", headers)
 97
 98        result = self.post_raw(
 99            url,
100            options=options,
101            addons=addons,
102            headers=headers,
103            json=body,
104            timeout=timeout,
105            **kwargs,
106        )
107
108        self._logger.info("result: %s", str(result))
109        self._logger.notice("speak succeeded")
110        self._logger.debug("SpeakClient.stream LEAVE")
111        return result
112
113    def stream_memory(
114        self,
115        source: FileSource,
116        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
117        addons: Optional[Dict] = None,
118        headers: Optional[Dict] = None,
119        timeout: Optional[httpx.Timeout] = None,
120        endpoint: str = "v1/speak",
121        **kwargs,
122    ) -> SpeakRESTResponse:
123        """
124        Speak from a text source and store in memory.
125
126        Args:
127            source (TextSource): The text source to speak.
128            options (SpeakRESTOptions): Additional options for the ingest (default is None).
129            addons (Dict): Additional options for the request (default is None).
130            headers (Dict): Additional headers for the request (default is None).
131            timeout (httpx.Timeout): The timeout for the request (default is None).
132            endpoint (str): The endpoint to use for the request (default is "v1/speak").
133
134        Returns:
135            SpeakRESTResponse: The response from the speak request.
136
137        Raises:
138            DeepgramTypeError: Raised for known API errors.
139        """
140        self._logger.debug("SpeakClient.stream ENTER")
141
142        url = f"{self._config.url}/{endpoint}"
143        if is_text_source(source):
144            body = source
145        else:
146            self._logger.error("Unknown speak source type")
147            self._logger.debug("SpeakClient.stream LEAVE")
148            raise DeepgramTypeError("Unknown speak source type")
149
150        if isinstance(options, SpeakRESTOptions) and not options.check():
151            self._logger.error("options.check failed")
152            self._logger.debug("SpeakClient.stream LEAVE")
153            raise DeepgramError("Fatal speak options error")
154
155        self._logger.info("url: %s", url)
156        self._logger.info("source: %s", source)
157        if isinstance(options, SpeakRESTOptions):
158            self._logger.info("SpeakRESTOptions switching class -> dict")
159            options = options.to_dict()
160        self._logger.info("options: %s", options)
161        self._logger.info("addons: %s", addons)
162        self._logger.info("headers: %s", headers)
163
164        return_vals = [
165            "content-type",
166            "request-id",
167            "model-uuid",
168            "model-name",
169            "char-count",
170            "transfer-encoding",
171            "date",
172        ]
173        result = self.post_memory(
174            url,
175            options=options,
176            addons=addons,
177            headers=headers,
178            json=body,
179            timeout=timeout,
180            file_result=return_vals,
181            **kwargs,
182        )
183
184        self._logger.info("result: %s", result)
185        resp = SpeakRESTResponse(
186            content_type=str(result["content-type"]),
187            request_id=str(result["request-id"]),
188            model_uuid=str(result["model-uuid"]),
189            model_name=str(result["model-name"]),
190            characters=int(str(result["char-count"])),
191            transfer_encoding=str(result["transfer-encoding"]),
192            date=str(result["date"]),
193            stream=cast(io.BytesIO, result["stream"]),
194            stream_memory=cast(io.BytesIO, result["stream"]),
195        )
196        self._logger.verbose("resp Object: %s", resp)
197        self._logger.notice("speak succeeded")
198        self._logger.debug("SpeakClient.stream LEAVE")
199        return resp
200
201    @deprecation.deprecated(
202        deprecated_in="3.4.0",
203        removed_in="4.0.0",
204        current_version=__version__,
205        details="SpeakRESTClient.stream is deprecated. Use SpeakRESTClient.stream_memory instead.",
206    )
207    def stream(
208        self,
209        source: FileSource,
210        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
211        addons: Optional[Dict] = None,
212        headers: Optional[Dict] = None,
213        timeout: Optional[httpx.Timeout] = None,
214        endpoint: str = "v1/speak",
215        **kwargs,
216    ) -> SpeakRESTResponse:
217        """
218        DEPRECATED: stream() is deprecated. Use stream_memory() instead.
219        """
220        return self.stream_memory(
221            source,
222            options=options,
223            addons=addons,
224            headers=headers,
225            timeout=timeout,
226            endpoint=endpoint,
227            **kwargs,
228        )
229
230    async def file(
231        self,
232        filename: str,
233        source: FileSource,
234        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
235        addons: Optional[Dict] = None,
236        timeout: Optional[httpx.Timeout] = None,
237        endpoint: str = "v1/speak",
238        **kwargs,
239    ) -> SpeakRESTResponse:
240        """
241        Speak from a text source and save to a file.
242        """
243        return self.save(
244            filename,
245            source,
246            options=options,
247            addons=addons,
248            timeout=timeout,
249            endpoint=endpoint,
250            **kwargs,
251        )
252
253    def save(
254        self,
255        filename: str,
256        source: FileSource,
257        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
258        addons: Optional[Dict] = None,
259        headers: Optional[Dict] = None,
260        timeout: Optional[httpx.Timeout] = None,
261        endpoint: str = "v1/speak",
262        **kwargs,
263    ) -> SpeakRESTResponse:
264        """
265        Speak from a text source and save to a file.
266
267        Args:
268            source (TextSource): The text source to speak.
269            options (SpeakRESTOptions): Additional options for the ingest (default is None).
270            addons (Dict): Additional options for the request (default is None).
271            headers (Dict): Additional headers for the request (default is None).
272            timeout (httpx.Timeout): The timeout for the request (default is None).
273            endpoint (str): The endpoint to use for the request (default is "v1/speak").
274
275        Returns:
276            SpeakRESTResponse: The response from the speak request.
277
278        Raises:
279            DeepgramTypeError: Raised for known API errors.
280        """
281        self._logger.debug("SpeakClient.save ENTER")
282
283        res = self.stream_memory(
284            source,
285            options=options,
286            addons=addons,
287            headers=headers,
288            timeout=timeout,
289            endpoint=endpoint,
290            **kwargs,
291        )
292
293        if res.stream is None:
294            self._logger.error("stream is None")
295            self._logger.debug("SpeakClient.save LEAVE")
296            raise DeepgramError("BytesIO stream is None")
297
298        # save to file
299        with open(filename, "wb+") as file:
300            file.write(res.stream.getbuffer())
301            file.flush()
302
303        # add filename to response
304        res.stream = None
305        res.filename = filename
306
307        self._logger.debug("SpeakClient.save LEAVE")
308        return res
309
310    # pylint: enable=too-many-positional-arguments

A client class for doing Text-to-Speech. Provides methods for speaking from text.

SpeakRESTClient(config: deepgram.options.DeepgramClientOptions)
34    def __init__(self, config: DeepgramClientOptions):
35        self._logger = verboselogs.VerboseLogger(__name__)
36        self._logger.addHandler(logging.StreamHandler())
37        self._logger.setLevel(config.verbose)
38        self._config = config
39        super().__init__(config)
def stream_raw( self, source: Union[TextSource, BufferSource, StreamSource], options: Union[Dict, SpeakRESTOptions, NoneType] = None, addons: Optional[Dict] = None, headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = 'v1/speak', **kwargs) -> httpx.Response:
 43    def stream_raw(
 44        self,
 45        source: FileSource,
 46        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
 47        addons: Optional[Dict] = None,
 48        headers: Optional[Dict] = None,
 49        timeout: Optional[httpx.Timeout] = None,
 50        endpoint: str = "v1/speak",
 51        **kwargs,
 52    ) -> httpx.Response:
 53        """
 54        Speak from a text source and store as a Iterator[byte].
 55
 56        Args:
 57            source (TextSource): The text source to speak.
 58            options (SpeakRESTOptions): Additional options for the ingest (default is None).
 59            addons (Dict): Additional options for the request (default is None).
 60            headers (Dict): Additional headers for the request (default is None).
 61            timeout (httpx.Timeout): The timeout for the request (default is None).
 62            endpoint (str): The endpoint to use for the request (default is "v1/speak").
 63
 64        Returns:
 65            httpx.Response: The direct httpx.Response object from the speak request.
 66            For more information, see https://www.python-httpx.org/api/#response
 67
 68            IMPORTANT: The response object's `close()` method should be called when done
 69            in order to prevent connection leaks.
 70
 71        Raises:
 72            DeepgramTypeError: Raised for known API errors.
 73        """
 74        self._logger.debug("SpeakClient.stream ENTER")
 75
 76        url = f"{self._config.url}/{endpoint}"
 77        if is_text_source(source):
 78            body = source
 79        else:
 80            self._logger.error("Unknown speak source type")
 81            self._logger.debug("SpeakClient.stream LEAVE")
 82            raise DeepgramTypeError("Unknown speak source type")
 83
 84        if isinstance(options, SpeakRESTOptions) and not options.check():
 85            self._logger.error("options.check failed")
 86            self._logger.debug("SpeakClient.stream LEAVE")
 87            raise DeepgramError("Fatal speak options error")
 88
 89        self._logger.info("url: %s", url)
 90        self._logger.info("source: %s", source)
 91        if isinstance(options, SpeakRESTOptions):
 92            self._logger.info("SpeakRESTOptions switching class -> dict")
 93            options = options.to_dict()
 94        self._logger.info("options: %s", options)
 95        self._logger.info("addons: %s", addons)
 96        self._logger.info("headers: %s", headers)
 97
 98        result = self.post_raw(
 99            url,
100            options=options,
101            addons=addons,
102            headers=headers,
103            json=body,
104            timeout=timeout,
105            **kwargs,
106        )
107
108        self._logger.info("result: %s", str(result))
109        self._logger.notice("speak succeeded")
110        self._logger.debug("SpeakClient.stream LEAVE")
111        return result

Speak from a text source and store as a Iterator[byte].

Args: source (TextSource): The text source to speak. options (SpeakRESTOptions): Additional options for the ingest (default is None). addons (Dict): Additional options for the request (default is None). headers (Dict): Additional headers for the request (default is None). timeout (httpx.Timeout): The timeout for the request (default is None). endpoint (str): The endpoint to use for the request (default is "v1/speak").

Returns: httpx.Response: The direct httpx.Response object from the speak request. For more information, see https://www.python-httpx.org/api/#response

IMPORTANT: The response object's `close()` method should be called when done
in order to prevent connection leaks.

Raises: DeepgramTypeError: Raised for known API errors.

def stream_memory( self, source: Union[TextSource, BufferSource, StreamSource], options: Union[Dict, SpeakRESTOptions, NoneType] = None, addons: Optional[Dict] = None, headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = 'v1/speak', **kwargs) -> SpeakRESTResponse:
113    def stream_memory(
114        self,
115        source: FileSource,
116        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
117        addons: Optional[Dict] = None,
118        headers: Optional[Dict] = None,
119        timeout: Optional[httpx.Timeout] = None,
120        endpoint: str = "v1/speak",
121        **kwargs,
122    ) -> SpeakRESTResponse:
123        """
124        Speak from a text source and store in memory.
125
126        Args:
127            source (TextSource): The text source to speak.
128            options (SpeakRESTOptions): Additional options for the ingest (default is None).
129            addons (Dict): Additional options for the request (default is None).
130            headers (Dict): Additional headers for the request (default is None).
131            timeout (httpx.Timeout): The timeout for the request (default is None).
132            endpoint (str): The endpoint to use for the request (default is "v1/speak").
133
134        Returns:
135            SpeakRESTResponse: The response from the speak request.
136
137        Raises:
138            DeepgramTypeError: Raised for known API errors.
139        """
140        self._logger.debug("SpeakClient.stream ENTER")
141
142        url = f"{self._config.url}/{endpoint}"
143        if is_text_source(source):
144            body = source
145        else:
146            self._logger.error("Unknown speak source type")
147            self._logger.debug("SpeakClient.stream LEAVE")
148            raise DeepgramTypeError("Unknown speak source type")
149
150        if isinstance(options, SpeakRESTOptions) and not options.check():
151            self._logger.error("options.check failed")
152            self._logger.debug("SpeakClient.stream LEAVE")
153            raise DeepgramError("Fatal speak options error")
154
155        self._logger.info("url: %s", url)
156        self._logger.info("source: %s", source)
157        if isinstance(options, SpeakRESTOptions):
158            self._logger.info("SpeakRESTOptions switching class -> dict")
159            options = options.to_dict()
160        self._logger.info("options: %s", options)
161        self._logger.info("addons: %s", addons)
162        self._logger.info("headers: %s", headers)
163
164        return_vals = [
165            "content-type",
166            "request-id",
167            "model-uuid",
168            "model-name",
169            "char-count",
170            "transfer-encoding",
171            "date",
172        ]
173        result = self.post_memory(
174            url,
175            options=options,
176            addons=addons,
177            headers=headers,
178            json=body,
179            timeout=timeout,
180            file_result=return_vals,
181            **kwargs,
182        )
183
184        self._logger.info("result: %s", result)
185        resp = SpeakRESTResponse(
186            content_type=str(result["content-type"]),
187            request_id=str(result["request-id"]),
188            model_uuid=str(result["model-uuid"]),
189            model_name=str(result["model-name"]),
190            characters=int(str(result["char-count"])),
191            transfer_encoding=str(result["transfer-encoding"]),
192            date=str(result["date"]),
193            stream=cast(io.BytesIO, result["stream"]),
194            stream_memory=cast(io.BytesIO, result["stream"]),
195        )
196        self._logger.verbose("resp Object: %s", resp)
197        self._logger.notice("speak succeeded")
198        self._logger.debug("SpeakClient.stream LEAVE")
199        return resp

Speak from a text source and store in memory.

Args: source (TextSource): The text source to speak. options (SpeakRESTOptions): Additional options for the ingest (default is None). addons (Dict): Additional options for the request (default is None). headers (Dict): Additional headers for the request (default is None). timeout (httpx.Timeout): The timeout for the request (default is None). endpoint (str): The endpoint to use for the request (default is "v1/speak").

Returns: SpeakRESTResponse: The response from the speak request.

Raises: DeepgramTypeError: Raised for known API errors.

@deprecation.deprecated(deprecated_in='3.4.0', removed_in='4.0.0', current_version=__version__, details='SpeakRESTClient.stream is deprecated. Use SpeakRESTClient.stream_memory instead.')
def stream( self, source: Union[TextSource, BufferSource, StreamSource], options: Union[Dict, SpeakRESTOptions, NoneType] = None, addons: Optional[Dict] = None, headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = 'v1/speak', **kwargs) -> SpeakRESTResponse:
201    @deprecation.deprecated(
202        deprecated_in="3.4.0",
203        removed_in="4.0.0",
204        current_version=__version__,
205        details="SpeakRESTClient.stream is deprecated. Use SpeakRESTClient.stream_memory instead.",
206    )
207    def stream(
208        self,
209        source: FileSource,
210        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
211        addons: Optional[Dict] = None,
212        headers: Optional[Dict] = None,
213        timeout: Optional[httpx.Timeout] = None,
214        endpoint: str = "v1/speak",
215        **kwargs,
216    ) -> SpeakRESTResponse:
217        """
218        DEPRECATED: stream() is deprecated. Use stream_memory() instead.
219        """
220        return self.stream_memory(
221            source,
222            options=options,
223            addons=addons,
224            headers=headers,
225            timeout=timeout,
226            endpoint=endpoint,
227            **kwargs,
228        )

DEPRECATED: stream() is deprecated. Use stream_memory() instead.

async def file( self, filename: str, source: Union[TextSource, BufferSource, StreamSource], options: Union[Dict, SpeakRESTOptions, NoneType] = None, addons: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = 'v1/speak', **kwargs) -> SpeakRESTResponse:
230    async def file(
231        self,
232        filename: str,
233        source: FileSource,
234        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
235        addons: Optional[Dict] = None,
236        timeout: Optional[httpx.Timeout] = None,
237        endpoint: str = "v1/speak",
238        **kwargs,
239    ) -> SpeakRESTResponse:
240        """
241        Speak from a text source and save to a file.
242        """
243        return self.save(
244            filename,
245            source,
246            options=options,
247            addons=addons,
248            timeout=timeout,
249            endpoint=endpoint,
250            **kwargs,
251        )

Speak from a text source and save to a file.

def save( self, filename: str, source: Union[TextSource, BufferSource, StreamSource], options: Union[Dict, SpeakRESTOptions, NoneType] = None, addons: Optional[Dict] = None, headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = 'v1/speak', **kwargs) -> SpeakRESTResponse:
253    def save(
254        self,
255        filename: str,
256        source: FileSource,
257        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
258        addons: Optional[Dict] = None,
259        headers: Optional[Dict] = None,
260        timeout: Optional[httpx.Timeout] = None,
261        endpoint: str = "v1/speak",
262        **kwargs,
263    ) -> SpeakRESTResponse:
264        """
265        Speak from a text source and save to a file.
266
267        Args:
268            source (TextSource): The text source to speak.
269            options (SpeakRESTOptions): Additional options for the ingest (default is None).
270            addons (Dict): Additional options for the request (default is None).
271            headers (Dict): Additional headers for the request (default is None).
272            timeout (httpx.Timeout): The timeout for the request (default is None).
273            endpoint (str): The endpoint to use for the request (default is "v1/speak").
274
275        Returns:
276            SpeakRESTResponse: The response from the speak request.
277
278        Raises:
279            DeepgramTypeError: Raised for known API errors.
280        """
281        self._logger.debug("SpeakClient.save ENTER")
282
283        res = self.stream_memory(
284            source,
285            options=options,
286            addons=addons,
287            headers=headers,
288            timeout=timeout,
289            endpoint=endpoint,
290            **kwargs,
291        )
292
293        if res.stream is None:
294            self._logger.error("stream is None")
295            self._logger.debug("SpeakClient.save LEAVE")
296            raise DeepgramError("BytesIO stream is None")
297
298        # save to file
299        with open(filename, "wb+") as file:
300            file.write(res.stream.getbuffer())
301            file.flush()
302
303        # add filename to response
304        res.stream = None
305        res.filename = filename
306
307        self._logger.debug("SpeakClient.save LEAVE")
308        return res

Speak from a text source and save to a file.

Args: source (TextSource): The text source to speak. options (SpeakRESTOptions): Additional options for the ingest (default is None). addons (Dict): Additional options for the request (default is None). headers (Dict): Additional headers for the request (default is None). timeout (httpx.Timeout): The timeout for the request (default is None). endpoint (str): The endpoint to use for the request (default is "v1/speak").

Returns: SpeakRESTResponse: The response from the speak request.

Raises: DeepgramTypeError: Raised for known API errors.

 26class AsyncSpeakRESTClient(AbstractAsyncRestClient):
 27    """
 28    A client class for doing Text-to-Speech.
 29    Provides methods for speaking from text.
 30    """
 31
 32    _logger: verboselogs.VerboseLogger
 33    _config: DeepgramClientOptions
 34
 35    def __init__(self, config: DeepgramClientOptions):
 36        self._logger = verboselogs.VerboseLogger(__name__)
 37        self._logger.addHandler(logging.StreamHandler())
 38        self._logger.setLevel(config.verbose)
 39        self._config = config
 40        super().__init__(config)
 41
 42    # pylint: disable=too-many-positional-arguments
 43
 44    async def stream_raw(
 45        self,
 46        source: FileSource,
 47        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
 48        addons: Optional[Dict] = None,
 49        headers: Optional[Dict] = None,
 50        timeout: Optional[httpx.Timeout] = None,
 51        endpoint: str = "v1/speak",
 52        **kwargs,
 53    ) -> httpx.Response:
 54        """
 55        Speak from a text source and store as a Iterator[byte].
 56
 57        Args:
 58            source (TextSource): The text source to speak.
 59            options (SpeakRESTOptions): Additional options for the ingest (default is None).
 60            addons (Dict): Additional options for the request (default is None).
 61            headers (Dict): Additional headers for the request (default is None).
 62            timeout (httpx.Timeout): The timeout for the request (default is None).
 63            endpoint (str): The endpoint to use for the request (default is "v1/speak").
 64
 65        Returns:
 66            httpx.Response: The direct httpx.Response object from the speak request.
 67            For more information, see https://www.python-httpx.org/api/#response
 68
 69            IMPORTANT: The response object's `close()` method should be called when done
 70            in order to prevent connection leaks.
 71
 72        Raises:
 73            DeepgramTypeError: Raised for known API errors.
 74        """
 75        self._logger.debug("AsyncSpeakClient.stream ENTER")
 76
 77        url = f"{self._config.url}/{endpoint}"
 78        if is_text_source(source):
 79            body = source
 80        else:
 81            self._logger.error("Unknown speak source type")
 82            self._logger.debug("AsyncSpeakClient.stream LEAVE")
 83            raise DeepgramTypeError("Unknown speak source type")
 84
 85        if isinstance(options, SpeakRESTOptions) and not options.check():
 86            self._logger.error("options.check failed")
 87            self._logger.debug("AsyncSpeakClient.stream LEAVE")
 88            raise DeepgramError("Fatal speak options error")
 89
 90        self._logger.info("url: %s", url)
 91        self._logger.info("source: %s", source)
 92        if isinstance(options, SpeakRESTOptions):
 93            self._logger.info("SpeakRESTOptions switching class -> dict")
 94            options = options.to_dict()
 95        self._logger.info("options: %s", options)
 96        self._logger.info("addons: %s", addons)
 97        self._logger.info("headers: %s", headers)
 98
 99        result = await self.post_raw(
100            url,
101            options=options,
102            addons=addons,
103            headers=headers,
104            json=body,
105            timeout=timeout,
106            **kwargs,
107        )
108
109        self._logger.info("result: %s", str(result))
110        self._logger.notice("speak succeeded")
111        self._logger.debug("AsyncSpeakClient.stream LEAVE")
112        return result
113
114    async def stream_memory(
115        self,
116        source: FileSource,
117        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
118        addons: Optional[Dict] = None,
119        headers: Optional[Dict] = None,
120        timeout: Optional[httpx.Timeout] = None,
121        endpoint: str = "v1/speak",
122        **kwargs,
123    ) -> SpeakRESTResponse:
124        """
125        Speak from a text source and store in memory.
126
127        Args:
128            source (TextSource): The text source to speak.
129            options (SpeakRESTOptions): Additional options for the ingest (default is None).
130            addons (Dict): Additional options for the request (default is None).
131            headers (Dict): Additional headers for the request (default is None).
132            timeout (httpx.Timeout): The timeout for the request (default is None).
133            endpoint (str): The endpoint to use for the request (default is "v1/speak").
134
135        Returns:
136            SpeakRESTResponse: The response from the speak request.
137
138        Raises:
139            DeepgramTypeError: Raised for known API errors.
140        """
141        self._logger.debug("AsyncSpeakClient.stream ENTER")
142
143        url = f"{self._config.url}/{endpoint}"
144        if is_text_source(source):
145            body = source
146        else:
147            self._logger.error("Unknown speak source type")
148            self._logger.debug("AsyncSpeakClient.stream LEAVE")
149            raise DeepgramTypeError("Unknown speak source type")
150
151        if isinstance(options, SpeakRESTOptions) and not options.check():
152            self._logger.error("options.check failed")
153            self._logger.debug("AsyncSpeakClient.stream LEAVE")
154            raise DeepgramError("Fatal speak options error")
155
156        self._logger.info("url: %s", url)
157        self._logger.info("source: %s", source)
158        if isinstance(options, SpeakRESTOptions):
159            self._logger.info("SpeakRESTOptions switching class -> dict")
160            options = options.to_dict()
161        self._logger.info("options: %s", options)
162        self._logger.info("addons: %s", addons)
163        self._logger.info("headers: %s", headers)
164
165        return_vals = [
166            "content-type",
167            "request-id",
168            "model-uuid",
169            "model-name",
170            "char-count",
171            "transfer-encoding",
172            "date",
173        ]
174        result = await self.post_memory(
175            url,
176            options=options,
177            addons=addons,
178            headers=headers,
179            json=body,
180            timeout=timeout,
181            file_result=return_vals,
182            **kwargs,
183        )
184        self._logger.info("result: %s", result)
185        resp = SpeakRESTResponse(
186            content_type=str(result["content-type"]),
187            request_id=str(result["request-id"]),
188            model_uuid=str(result["model-uuid"]),
189            model_name=str(result["model-name"]),
190            characters=int(str(result["char-count"])),
191            transfer_encoding=str(result["transfer-encoding"]),
192            date=str(result["date"]),
193            stream=cast(io.BytesIO, result["stream"]),
194            stream_memory=cast(io.BytesIO, result["stream"]),
195        )
196        self._logger.verbose("resp Object: %s", str(resp))
197        self._logger.notice("speak succeeded")
198        self._logger.debug("AsyncSpeakClient.stream LEAVE")
199        return resp
200
201    @deprecation.deprecated(
202        deprecated_in="3.4.0",
203        removed_in="4.0.0",
204        current_version=__version__,
205        details="SpeakRESTClient.stream is deprecated. Use SpeakRESTClient.stream_memory instead.",
206    )
207    async def stream(
208        self,
209        source: FileSource,
210        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
211        addons: Optional[Dict] = None,
212        headers: Optional[Dict] = None,
213        timeout: Optional[httpx.Timeout] = None,
214        endpoint: str = "v1/speak",
215        **kwargs,
216    ) -> SpeakRESTResponse:
217        """
218        DEPRECATED: stream() is deprecated. Use stream_memory() instead.
219        """
220        return await self.stream_memory(
221            source,
222            options=options,
223            addons=addons,
224            headers=headers,
225            timeout=timeout,
226            endpoint=endpoint,
227            **kwargs,
228        )
229
230    async def file(
231        self,
232        filename: str,
233        source: FileSource,
234        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
235        addons: Optional[Dict] = None,
236        timeout: Optional[httpx.Timeout] = None,
237        endpoint: str = "v1/speak",
238        **kwargs,
239    ) -> SpeakRESTResponse:
240        """
241        Speak from a text source and save to a file.
242        """
243        return await self.save(
244            filename,
245            source,
246            options=options,
247            addons=addons,
248            timeout=timeout,
249            endpoint=endpoint,
250            **kwargs,
251        )
252
253    async def save(
254        self,
255        filename: str,
256        source: FileSource,
257        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
258        addons: Optional[Dict] = None,
259        headers: Optional[Dict] = None,
260        timeout: Optional[httpx.Timeout] = None,
261        endpoint: str = "v1/speak",
262        **kwargs,
263    ) -> SpeakRESTResponse:
264        """
265        Speak from a text source and save to a file.
266
267        Args:
268            source (TextSource): The text source to speak.
269            options (SpeakRESTOptions): Additional options for the ingest (default is None).
270            addons (Dict): Additional options for the request (default is None).
271            headers (Dict): Additional headers for the request (default is None).
272            timeout (httpx.Timeout): The timeout for the request (default is None).
273            endpoint (str): The endpoint to use for the request (default is "v1/speak").
274
275        Returns:
276            SpeakRESTResponse: The response from the speak request.
277
278        Raises:
279            DeepgramTypeError: Raised for known API errors.
280        """
281        self._logger.debug("AsyncSpeakClient.save ENTER")
282
283        res = await self.stream_memory(
284            source,
285            options=options,
286            addons=addons,
287            headers=headers,
288            timeout=timeout,
289            endpoint=endpoint,
290            **kwargs,
291        )
292
293        if res.stream is None:
294            self._logger.error("stream is None")
295            self._logger.debug("AsyncSpeakClient.save LEAVE")
296            raise DeepgramError("BytesIO stream is None")
297
298        # save to file
299        async with aiofiles.open(filename, "wb") as out:
300            await out.write(res.stream.getbuffer())
301            await out.flush()
302
303        # add filename to response
304        res.stream = None
305        res.filename = filename
306
307        self._logger.debug("AsyncSpeakClient.save LEAVE")
308        return res
309
310    # pylint: enable=too-many-positional-arguments

A client class for doing Text-to-Speech. Provides methods for speaking from text.

AsyncSpeakRESTClient(config: deepgram.options.DeepgramClientOptions)
35    def __init__(self, config: DeepgramClientOptions):
36        self._logger = verboselogs.VerboseLogger(__name__)
37        self._logger.addHandler(logging.StreamHandler())
38        self._logger.setLevel(config.verbose)
39        self._config = config
40        super().__init__(config)
async def stream_raw( self, source: Union[TextSource, BufferSource, StreamSource], options: Union[Dict, SpeakRESTOptions, NoneType] = None, addons: Optional[Dict] = None, headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = 'v1/speak', **kwargs) -> httpx.Response:
 44    async def stream_raw(
 45        self,
 46        source: FileSource,
 47        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
 48        addons: Optional[Dict] = None,
 49        headers: Optional[Dict] = None,
 50        timeout: Optional[httpx.Timeout] = None,
 51        endpoint: str = "v1/speak",
 52        **kwargs,
 53    ) -> httpx.Response:
 54        """
 55        Speak from a text source and store as a Iterator[byte].
 56
 57        Args:
 58            source (TextSource): The text source to speak.
 59            options (SpeakRESTOptions): Additional options for the ingest (default is None).
 60            addons (Dict): Additional options for the request (default is None).
 61            headers (Dict): Additional headers for the request (default is None).
 62            timeout (httpx.Timeout): The timeout for the request (default is None).
 63            endpoint (str): The endpoint to use for the request (default is "v1/speak").
 64
 65        Returns:
 66            httpx.Response: The direct httpx.Response object from the speak request.
 67            For more information, see https://www.python-httpx.org/api/#response
 68
 69            IMPORTANT: The response object's `close()` method should be called when done
 70            in order to prevent connection leaks.
 71
 72        Raises:
 73            DeepgramTypeError: Raised for known API errors.
 74        """
 75        self._logger.debug("AsyncSpeakClient.stream ENTER")
 76
 77        url = f"{self._config.url}/{endpoint}"
 78        if is_text_source(source):
 79            body = source
 80        else:
 81            self._logger.error("Unknown speak source type")
 82            self._logger.debug("AsyncSpeakClient.stream LEAVE")
 83            raise DeepgramTypeError("Unknown speak source type")
 84
 85        if isinstance(options, SpeakRESTOptions) and not options.check():
 86            self._logger.error("options.check failed")
 87            self._logger.debug("AsyncSpeakClient.stream LEAVE")
 88            raise DeepgramError("Fatal speak options error")
 89
 90        self._logger.info("url: %s", url)
 91        self._logger.info("source: %s", source)
 92        if isinstance(options, SpeakRESTOptions):
 93            self._logger.info("SpeakRESTOptions switching class -> dict")
 94            options = options.to_dict()
 95        self._logger.info("options: %s", options)
 96        self._logger.info("addons: %s", addons)
 97        self._logger.info("headers: %s", headers)
 98
 99        result = await self.post_raw(
100            url,
101            options=options,
102            addons=addons,
103            headers=headers,
104            json=body,
105            timeout=timeout,
106            **kwargs,
107        )
108
109        self._logger.info("result: %s", str(result))
110        self._logger.notice("speak succeeded")
111        self._logger.debug("AsyncSpeakClient.stream LEAVE")
112        return result

Speak from a text source and store as a Iterator[byte].

Args: source (TextSource): The text source to speak. options (SpeakRESTOptions): Additional options for the ingest (default is None). addons (Dict): Additional options for the request (default is None). headers (Dict): Additional headers for the request (default is None). timeout (httpx.Timeout): The timeout for the request (default is None). endpoint (str): The endpoint to use for the request (default is "v1/speak").

Returns: httpx.Response: The direct httpx.Response object from the speak request. For more information, see https://www.python-httpx.org/api/#response

IMPORTANT: The response object's `close()` method should be called when done
in order to prevent connection leaks.

Raises: DeepgramTypeError: Raised for known API errors.

async def stream_memory( self, source: Union[TextSource, BufferSource, StreamSource], options: Union[Dict, SpeakRESTOptions, NoneType] = None, addons: Optional[Dict] = None, headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = 'v1/speak', **kwargs) -> SpeakRESTResponse:
114    async def stream_memory(
115        self,
116        source: FileSource,
117        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
118        addons: Optional[Dict] = None,
119        headers: Optional[Dict] = None,
120        timeout: Optional[httpx.Timeout] = None,
121        endpoint: str = "v1/speak",
122        **kwargs,
123    ) -> SpeakRESTResponse:
124        """
125        Speak from a text source and store in memory.
126
127        Args:
128            source (TextSource): The text source to speak.
129            options (SpeakRESTOptions): Additional options for the ingest (default is None).
130            addons (Dict): Additional options for the request (default is None).
131            headers (Dict): Additional headers for the request (default is None).
132            timeout (httpx.Timeout): The timeout for the request (default is None).
133            endpoint (str): The endpoint to use for the request (default is "v1/speak").
134
135        Returns:
136            SpeakRESTResponse: The response from the speak request.
137
138        Raises:
139            DeepgramTypeError: Raised for known API errors.
140        """
141        self._logger.debug("AsyncSpeakClient.stream ENTER")
142
143        url = f"{self._config.url}/{endpoint}"
144        if is_text_source(source):
145            body = source
146        else:
147            self._logger.error("Unknown speak source type")
148            self._logger.debug("AsyncSpeakClient.stream LEAVE")
149            raise DeepgramTypeError("Unknown speak source type")
150
151        if isinstance(options, SpeakRESTOptions) and not options.check():
152            self._logger.error("options.check failed")
153            self._logger.debug("AsyncSpeakClient.stream LEAVE")
154            raise DeepgramError("Fatal speak options error")
155
156        self._logger.info("url: %s", url)
157        self._logger.info("source: %s", source)
158        if isinstance(options, SpeakRESTOptions):
159            self._logger.info("SpeakRESTOptions switching class -> dict")
160            options = options.to_dict()
161        self._logger.info("options: %s", options)
162        self._logger.info("addons: %s", addons)
163        self._logger.info("headers: %s", headers)
164
165        return_vals = [
166            "content-type",
167            "request-id",
168            "model-uuid",
169            "model-name",
170            "char-count",
171            "transfer-encoding",
172            "date",
173        ]
174        result = await self.post_memory(
175            url,
176            options=options,
177            addons=addons,
178            headers=headers,
179            json=body,
180            timeout=timeout,
181            file_result=return_vals,
182            **kwargs,
183        )
184        self._logger.info("result: %s", result)
185        resp = SpeakRESTResponse(
186            content_type=str(result["content-type"]),
187            request_id=str(result["request-id"]),
188            model_uuid=str(result["model-uuid"]),
189            model_name=str(result["model-name"]),
190            characters=int(str(result["char-count"])),
191            transfer_encoding=str(result["transfer-encoding"]),
192            date=str(result["date"]),
193            stream=cast(io.BytesIO, result["stream"]),
194            stream_memory=cast(io.BytesIO, result["stream"]),
195        )
196        self._logger.verbose("resp Object: %s", str(resp))
197        self._logger.notice("speak succeeded")
198        self._logger.debug("AsyncSpeakClient.stream LEAVE")
199        return resp

Speak from a text source and store in memory.

Args: source (TextSource): The text source to speak. options (SpeakRESTOptions): Additional options for the ingest (default is None). addons (Dict): Additional options for the request (default is None). headers (Dict): Additional headers for the request (default is None). timeout (httpx.Timeout): The timeout for the request (default is None). endpoint (str): The endpoint to use for the request (default is "v1/speak").

Returns: SpeakRESTResponse: The response from the speak request.

Raises: DeepgramTypeError: Raised for known API errors.

@deprecation.deprecated(deprecated_in='3.4.0', removed_in='4.0.0', current_version=__version__, details='SpeakRESTClient.stream is deprecated. Use SpeakRESTClient.stream_memory instead.')
async def stream( self, source: Union[TextSource, BufferSource, StreamSource], options: Union[Dict, SpeakRESTOptions, NoneType] = None, addons: Optional[Dict] = None, headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = 'v1/speak', **kwargs) -> SpeakRESTResponse:
201    @deprecation.deprecated(
202        deprecated_in="3.4.0",
203        removed_in="4.0.0",
204        current_version=__version__,
205        details="SpeakRESTClient.stream is deprecated. Use SpeakRESTClient.stream_memory instead.",
206    )
207    async def stream(
208        self,
209        source: FileSource,
210        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
211        addons: Optional[Dict] = None,
212        headers: Optional[Dict] = None,
213        timeout: Optional[httpx.Timeout] = None,
214        endpoint: str = "v1/speak",
215        **kwargs,
216    ) -> SpeakRESTResponse:
217        """
218        DEPRECATED: stream() is deprecated. Use stream_memory() instead.
219        """
220        return await self.stream_memory(
221            source,
222            options=options,
223            addons=addons,
224            headers=headers,
225            timeout=timeout,
226            endpoint=endpoint,
227            **kwargs,
228        )

DEPRECATED: stream() is deprecated. Use stream_memory() instead.

async def file( self, filename: str, source: Union[TextSource, BufferSource, StreamSource], options: Union[Dict, SpeakRESTOptions, NoneType] = None, addons: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = 'v1/speak', **kwargs) -> SpeakRESTResponse:
230    async def file(
231        self,
232        filename: str,
233        source: FileSource,
234        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
235        addons: Optional[Dict] = None,
236        timeout: Optional[httpx.Timeout] = None,
237        endpoint: str = "v1/speak",
238        **kwargs,
239    ) -> SpeakRESTResponse:
240        """
241        Speak from a text source and save to a file.
242        """
243        return await self.save(
244            filename,
245            source,
246            options=options,
247            addons=addons,
248            timeout=timeout,
249            endpoint=endpoint,
250            **kwargs,
251        )

Speak from a text source and save to a file.

async def save( self, filename: str, source: Union[TextSource, BufferSource, StreamSource], options: Union[Dict, SpeakRESTOptions, NoneType] = None, addons: Optional[Dict] = None, headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = 'v1/speak', **kwargs) -> SpeakRESTResponse:
253    async def save(
254        self,
255        filename: str,
256        source: FileSource,
257        options: Optional[Union[Dict, SpeakRESTOptions]] = None,
258        addons: Optional[Dict] = None,
259        headers: Optional[Dict] = None,
260        timeout: Optional[httpx.Timeout] = None,
261        endpoint: str = "v1/speak",
262        **kwargs,
263    ) -> SpeakRESTResponse:
264        """
265        Speak from a text source and save to a file.
266
267        Args:
268            source (TextSource): The text source to speak.
269            options (SpeakRESTOptions): Additional options for the ingest (default is None).
270            addons (Dict): Additional options for the request (default is None).
271            headers (Dict): Additional headers for the request (default is None).
272            timeout (httpx.Timeout): The timeout for the request (default is None).
273            endpoint (str): The endpoint to use for the request (default is "v1/speak").
274
275        Returns:
276            SpeakRESTResponse: The response from the speak request.
277
278        Raises:
279            DeepgramTypeError: Raised for known API errors.
280        """
281        self._logger.debug("AsyncSpeakClient.save ENTER")
282
283        res = await self.stream_memory(
284            source,
285            options=options,
286            addons=addons,
287            headers=headers,
288            timeout=timeout,
289            endpoint=endpoint,
290            **kwargs,
291        )
292
293        if res.stream is None:
294            self._logger.error("stream is None")
295            self._logger.debug("AsyncSpeakClient.save LEAVE")
296            raise DeepgramError("BytesIO stream is None")
297
298        # save to file
299        async with aiofiles.open(filename, "wb") as out:
300            await out.write(res.stream.getbuffer())
301            await out.flush()
302
303        # add filename to response
304        res.stream = None
305        res.filename = filename
306
307        self._logger.debug("AsyncSpeakClient.save LEAVE")
308        return res

Speak from a text source and save to a file.

Args: source (TextSource): The text source to speak. options (SpeakRESTOptions): Additional options for the ingest (default is None). addons (Dict): Additional options for the request (default is None). headers (Dict): Additional headers for the request (default is None). timeout (httpx.Timeout): The timeout for the request (default is None). endpoint (str): The endpoint to use for the request (default is "v1/speak").

Returns: SpeakRESTResponse: The response from the speak request.

Raises: DeepgramTypeError: Raised for known API errors.

 40class SpeakWSClient(
 41    AbstractSyncWebSocketClient
 42):  # pylint: disable=too-many-instance-attributes
 43    """
 44    Client for interacting with Deepgram's text-to-speech services over WebSockets.
 45
 46     This class provides methods to establish a WebSocket connection for TTS synthesis and handle real-time TTS synthesis events.
 47
 48     Args:
 49         config (DeepgramClientOptions): all the options for the client.
 50    """
 51
 52    _logger: verboselogs.VerboseLogger
 53    _config: DeepgramClientOptions
 54    _endpoint: str
 55
 56    _event_handlers: Dict[SpeakWebSocketEvents, list]
 57
 58    _flush_thread: Union[threading.Thread, None]
 59    _lock_flush: threading.Lock
 60    _last_datagram: Optional[datetime] = None
 61    _flush_count: int
 62
 63    _kwargs: Optional[Dict] = None
 64    _addons: Optional[Dict] = None
 65    _options: Optional[Dict] = None
 66    _headers: Optional[Dict] = None
 67
 68    _speaker_created: bool = False
 69    _speaker: Optional[Speaker] = None
 70    _microphone: Optional[Microphone] = None
 71
 72    def __init__(
 73        self, config: DeepgramClientOptions, microphone: Optional[Microphone] = None
 74    ):
 75        if config is None:
 76            raise DeepgramError("Config is required")
 77
 78        self._logger = verboselogs.VerboseLogger(__name__)
 79        self._logger.addHandler(logging.StreamHandler())
 80        self._logger.setLevel(config.verbose)
 81
 82        self._config = config
 83        self._endpoint = "v1/speak"
 84        self._lock_flush = threading.Lock()
 85
 86        self._flush_thread = None
 87
 88        # auto flush
 89        self._last_datagram = None
 90        self._flush_count = 0
 91
 92        # microphone
 93        self._microphone = microphone
 94
 95        # init handlers
 96        self._event_handlers = {
 97            event: [] for event in SpeakWebSocketEvents.__members__.values()
 98        }
 99
100        if self._config.options.get("speaker_playback") == "true":
101            self._logger.info("speaker_playback is enabled")
102            rate = self._config.options.get("speaker_playback_rate")
103            if rate is None:
104                rate = RATE
105            channels = self._config.options.get("speaker_playback_channels")
106            if channels is None:
107                channels = CHANNELS
108            playback_delta_in_ms = self._config.options.get(
109                "speaker_playback_delta_in_ms"
110            )
111            if playback_delta_in_ms is None:
112                playback_delta_in_ms = PLAYBACK_DELTA
113            device_index = self._config.options.get("speaker_playback_device_index")
114
115            self._logger.debug("rate: %s", rate)
116            self._logger.debug("channels: %s", channels)
117            self._logger.debug("device_index: %s", device_index)
118
119            self._speaker_created = True
120
121            if device_index is not None:
122                self._speaker = Speaker(
123                    rate=rate,
124                    channels=channels,
125                    last_play_delta_in_ms=playback_delta_in_ms,
126                    verbose=self._config.verbose,
127                    output_device_index=device_index,
128                    microphone=self._microphone,
129                )
130            else:
131                self._speaker = Speaker(
132                    rate=rate,
133                    channels=channels,
134                    last_play_delta_in_ms=playback_delta_in_ms,
135                    verbose=self._config.verbose,
136                    microphone=self._microphone,
137                )
138
139        # call the parent constructor
140        super().__init__(self._config, self._endpoint)
141
142    # pylint: disable=too-many-statements,too-many-branches
143    def start(
144        self,
145        options: Optional[Union[SpeakWSOptions, Dict]] = None,
146        addons: Optional[Dict] = None,
147        headers: Optional[Dict] = None,
148        members: Optional[Dict] = None,
149        **kwargs,
150    ) -> bool:
151        """
152        Starts the WebSocket connection for text-to-speech synthesis.
153        """
154        self._logger.debug("SpeakWebSocketClient.start ENTER")
155        self._logger.info("options: %s", options)
156        self._logger.info("addons: %s", addons)
157        self._logger.info("headers: %s", headers)
158        self._logger.info("members: %s", members)
159        self._logger.info("kwargs: %s", kwargs)
160
161        if isinstance(options, SpeakWSOptions) and not options.check():
162            self._logger.error("options.check failed")
163            self._logger.debug("SpeakWebSocketClient.start LEAVE")
164            raise DeepgramError("Fatal text-to-speech options error")
165
166        self._addons = addons
167        self._headers = headers
168
169        # add "members" as members of the class
170        if members is not None:
171            self.__dict__.update(members)
172
173        # set kwargs as members of the class
174        if kwargs is not None:
175            self._kwargs = kwargs
176        else:
177            self._kwargs = {}
178
179        if isinstance(options, SpeakWSOptions):
180            self._logger.info("SpeakWSOptions switching class -> dict")
181            self._options = options.to_dict()
182        elif options is not None:
183            self._options = options
184        else:
185            self._options = {}
186
187        try:
188            # speaker substitutes the listening thread
189            if self._speaker is not None:
190                self._logger.notice("passing speaker to delegate_listening")
191                super().delegate_listening(self._speaker)
192
193            # call parent start
194            if (
195                super().start(
196                    self._options,
197                    self._addons,
198                    self._headers,
199                    **dict(cast(Dict[Any, Any], self._kwargs)),
200                )
201                is False
202            ):
203                self._logger.error("SpeakWebSocketClient.start failed")
204                self._logger.debug("SpeakWebSocketClient.start LEAVE")
205                return False
206
207            if self._speaker is not None:
208                self._logger.notice("start delegate_listening thread")
209                self._speaker.start()
210
211            # debug the threads
212            for thread in threading.enumerate():
213                self._logger.debug("after running thread: %s", thread.name)
214            self._logger.debug("number of active threads: %s", threading.active_count())
215
216            # flush thread
217            if self._config.is_auto_flush_speak_enabled():
218                self._logger.notice("autoflush is enabled")
219                self._flush_thread = threading.Thread(target=self._flush)
220                self._flush_thread.start()
221            else:
222                self._logger.notice("autoflush is disabled")
223
224            # debug the threads
225            for thread in threading.enumerate():
226                self._logger.debug("after running thread: %s", thread.name)
227            self._logger.debug("number of active threads: %s", threading.active_count())
228
229            self._logger.notice("start succeeded")
230            self._logger.debug("SpeakWebSocketClient.start LEAVE")
231            return True
232
233        except Exception as e:  # pylint: disable=broad-except
234            self._logger.error(
235                "WebSocketException in SpeakWebSocketClient.start: %s", e
236            )
237            self._logger.debug("SpeakWebSocketClient.start LEAVE")
238            if self._config.options.get("termination_exception_connect") is True:
239                raise
240            return False
241
242    # pylint: enable=too-many-statements,too-many-branches
243
244    def on(self, event: SpeakWebSocketEvents, handler: Callable) -> None:
245        """
246        Registers event handlers for specific events.
247        """
248        self._logger.info("event subscribed: %s", event)
249        if event in SpeakWebSocketEvents.__members__.values() and callable(handler):
250            self._event_handlers[event].append(handler)
251
252    def _emit(self, event: SpeakWebSocketEvents, *args, **kwargs) -> None:
253        """
254        Emits events to the registered event handlers.
255        """
256        self._logger.debug("SpeakWebSocketClient._emit ENTER")
257        self._logger.debug("callback handlers for: %s", event)
258
259        # debug the threads
260        for thread in threading.enumerate():
261            self._logger.debug("after running thread: %s", thread.name)
262        self._logger.debug("number of active threads: %s", threading.active_count())
263
264        self._logger.debug("callback handlers for: %s", event)
265        for handler in self._event_handlers[event]:
266            handler(self, *args, **kwargs)
267
268        # debug the threads
269        for thread in threading.enumerate():
270            self._logger.debug("after running thread: %s", thread.name)
271        self._logger.debug("number of active threads: %s", threading.active_count())
272
273        self._logger.debug("ListenWebSocketClient._emit LEAVE")
274
275    def _process_text(self, message: str) -> None:
276        """
277        Processes messages received over the WebSocket connection.
278        """
279        self._logger.debug("SpeakWebSocketClient._process_text ENTER")
280
281        try:
282            self._logger.debug("Text data received")
283
284            if len(message) == 0:
285                self._logger.debug("message is empty")
286                self._logger.debug("SpeakWebSocketClient._process_text LEAVE")
287                return
288
289            data = json.loads(message)
290            response_type = data.get("type")
291            self._logger.debug("response_type: %s, data: %s", response_type, data)
292
293            match response_type:
294                case SpeakWebSocketEvents.Open:
295                    open_result: OpenResponse = OpenResponse.from_json(message)
296                    self._logger.verbose("OpenResponse: %s", open_result)
297                    self._emit(
298                        SpeakWebSocketEvents(SpeakWebSocketEvents.Open),
299                        open=open_result,
300                        **dict(cast(Dict[Any, Any], self._kwargs)),
301                    )
302                case SpeakWebSocketEvents.Metadata:
303                    meta_result: MetadataResponse = MetadataResponse.from_json(message)
304                    self._logger.verbose("MetadataResponse: %s", meta_result)
305                    self._emit(
306                        SpeakWebSocketEvents(SpeakWebSocketEvents.Metadata),
307                        metadata=meta_result,
308                        **dict(cast(Dict[Any, Any], self._kwargs)),
309                    )
310                case SpeakWebSocketEvents.Flushed:
311                    fl_result: FlushedResponse = FlushedResponse.from_json(message)
312                    self._logger.verbose("FlushedResponse: %s", fl_result)
313
314                    # auto flush
315                    if self._config.is_inspecting_speak():
316                        with self._lock_flush:
317                            self._flush_count -= 1
318                            self._logger.debug(
319                                "Decrement Flush count: %d",
320                                self._flush_count,
321                            )
322
323                    self._emit(
324                        SpeakWebSocketEvents(SpeakWebSocketEvents.Flushed),
325                        flushed=fl_result,
326                        **dict(cast(Dict[Any, Any], self._kwargs)),
327                    )
328                case SpeakWebSocketEvents.Cleared:
329                    clear_result: ClearedResponse = ClearedResponse.from_json(message)
330                    self._logger.verbose("ClearedResponse: %s", clear_result)
331                    self._emit(
332                        SpeakWebSocketEvents(SpeakWebSocketEvents.Cleared),
333                        cleared=clear_result,
334                        **dict(cast(Dict[Any, Any], self._kwargs)),
335                    )
336                case SpeakWebSocketEvents.Close:
337                    close_result: CloseResponse = CloseResponse.from_json(message)
338                    self._logger.verbose("CloseResponse: %s", close_result)
339                    self._emit(
340                        SpeakWebSocketEvents(SpeakWebSocketEvents.Close),
341                        close=close_result,
342                        **dict(cast(Dict[Any, Any], self._kwargs)),
343                    )
344                case SpeakWebSocketEvents.Warning:
345                    war_warning: WarningResponse = WarningResponse.from_json(message)
346                    self._logger.verbose("WarningResponse: %s", war_warning)
347                    self._emit(
348                        SpeakWebSocketEvents(SpeakWebSocketEvents.Warning),
349                        warning=war_warning,
350                        **dict(cast(Dict[Any, Any], self._kwargs)),
351                    )
352                case SpeakWebSocketEvents.Error:
353                    err_error: ErrorResponse = ErrorResponse.from_json(message)
354                    self._logger.verbose("ErrorResponse: %s", err_error)
355                    self._emit(
356                        SpeakWebSocketEvents(SpeakWebSocketEvents.Error),
357                        error=err_error,
358                        **dict(cast(Dict[Any, Any], self._kwargs)),
359                    )
360                case _:
361                    self._logger.warning(
362                        "Unknown Message: response_type: %s, data: %s",
363                        response_type,
364                        data,
365                    )
366                    unhandled_error: UnhandledResponse = UnhandledResponse(
367                        type=SpeakWebSocketEvents(SpeakWebSocketEvents.Unhandled),
368                        raw=message,
369                    )
370                    self._emit(
371                        SpeakWebSocketEvents(SpeakWebSocketEvents.Unhandled),
372                        unhandled=unhandled_error,
373                        **dict(cast(Dict[Any, Any], self._kwargs)),
374                    )
375
376            self._logger.notice("_process_text Succeeded")
377            self._logger.debug("SpeakWebSocketClient._process_text LEAVE")
378
379        except Exception as e:  # pylint: disable=broad-except
380            self._logger.error("Exception in SpeakWebSocketClient._process_text: %s", e)
381            e_error: ErrorResponse = ErrorResponse(
382                "Exception in SpeakWebSocketClient._process_text",
383                f"{e}",
384                "Exception",
385            )
386            self._logger.error(
387                "Exception in SpeakWebSocketClient._process_text: %s", str(e)
388            )
389            self._emit(
390                SpeakWebSocketEvents(SpeakWebSocketEvents.Error),
391                e_error,
392                **dict(cast(Dict[Any, Any], self._kwargs)),
393            )
394
395            # signal exit and close
396            super()._signal_exit()
397
398            self._logger.debug("SpeakWebSocketClient._process_text LEAVE")
399
400            if self._config.options.get("termination_exception") is True:
401                raise
402            return
403
404    # pylint: enable=too-many-return-statements,too-many-statements
405
406    def _process_binary(self, message: bytes) -> None:
407        self._logger.debug("SpeakWebSocketClient._process_binary ENTER")
408        self._logger.debug("Binary data received")
409
410        self._emit(
411            SpeakWebSocketEvents(SpeakWebSocketEvents.AudioData),
412            data=message,
413            **dict(cast(Dict[Any, Any], self._kwargs)),
414        )
415
416        self._logger.notice("_process_binary Succeeded")
417        self._logger.debug("SpeakWebSocketClient._process_binary LEAVE")
418
419    # pylint: disable=too-many-return-statements
420    def _flush(self) -> None:
421        self._logger.debug("SpeakWebSocketClient._flush ENTER")
422
423        delta_in_ms_str = self._config.options.get("auto_flush_speak_delta")
424        if delta_in_ms_str is None:
425            self._logger.error("auto_flush_speak_delta is None")
426            self._logger.debug("SpeakWebSocketClient._flush LEAVE")
427            return
428        delta_in_ms = float(delta_in_ms_str)
429
430        while True:
431            try:
432                self._exit_event.wait(timeout=HALF_SECOND)
433
434                if self._exit_event.is_set():
435                    self._logger.notice("_flush exiting gracefully")
436                    self._logger.debug("ListenWebSocketClient._flush LEAVE")
437                    return
438
439                if self._last_datagram is None:
440                    self._logger.debug("AutoFlush last_datagram is None")
441                    continue
442
443                with self._lock_flush:
444                    delta = datetime.now() - self._last_datagram
445                    diff_in_ms = delta.total_seconds() * 1000
446                    self._logger.debug("AutoFlush delta: %f", diff_in_ms)
447                    if diff_in_ms < delta_in_ms:
448                        self._logger.debug("AutoFlush delta is less than threshold")
449                        continue
450
451                self.flush()
452
453            except Exception as e:  # pylint: disable=broad-except
454                self._logger.error("Exception in SpeakWebSocketClient._flush: %s", e)
455                e_error: ErrorResponse = ErrorResponse(
456                    "Exception in SpeakWebSocketClient._flush",
457                    f"{e}",
458                    "Exception",
459                )
460                self._logger.error(
461                    "Exception in SpeakWebSocketClient._flush: %s", str(e)
462                )
463                self._emit(
464                    SpeakWebSocketEvents(SpeakWebSocketEvents.Error),
465                    error=e_error,
466                    **dict(cast(Dict[Any, Any], self._kwargs)),
467                )
468
469                # signal exit and close
470                super()._signal_exit()
471
472                self._logger.debug("SpeakWebSocketClient._flush LEAVE")
473
474                if self._config.options.get("termination_exception") is True:
475                    raise
476                return
477
478    # pylint: enable=too-many-return-statements
479
480    def send_text(self, text_input: str) -> bool:
481        """
482        Sends text to the WebSocket connection to generate audio.
483
484        Args:
485            text_input (str): The raw text to be synthesized. This function will automatically wrap
486                the text in a JSON object of type "Speak" with the key "text".
487
488        Returns:
489            bool: True if the text was successfully sent, False otherwise.
490        """
491        return self.send_raw(json.dumps({"type": "Speak", "text": text_input}))
492
493    def send(self, data: Union[str, bytes]) -> bool:
494        """
495        Alias for send_text. Please see send_text for more information.
496        """
497        if isinstance(data, bytes):
498            self._logger.error("send() failed - data is bytes")
499            return False
500
501        return self.send_text(data)
502
503    # pylint: disable=unused-argument
504    def send_control(
505        self, msg_type: Union[SpeakWebSocketMessage, str], data: Optional[str] = ""
506    ) -> bool:
507        """
508        Sends a control message consisting of type SpeakWebSocketEvents over the WebSocket connection.
509
510        Args:
511            msg_type (SpeakWebSocketEvents): The type of control message to send.
512            (Optional) data (str): The data to send with the control message.
513
514        Returns:
515            bool: True if the control message was successfully sent, False otherwise.
516        """
517        control_msg = json.dumps({"type": msg_type})
518        return self.send_raw(control_msg)
519
520    # pylint: enable=unused-argument
521
522    # pylint: disable=too-many-return-statements,too-many-branches,too-many-statements
523    def send_raw(self, msg: str) -> bool:
524        """
525        Sends a raw/control message over the WebSocket connection. This message must contain a valid JSON object.
526
527        Args:
528            msg (str): The raw message to send over the WebSocket connection.
529
530        Returns:
531            bool: True if the message was successfully sent, False otherwise.
532        """
533        self._logger.spam("SpeakWebSocketClient.send_raw ENTER")
534
535        if self._config.is_inspecting_speak():
536            try:
537                _tmp_json = json.loads(msg)
538                if "type" in _tmp_json:
539                    self._logger.debug(
540                        "Inspecting Message: Sending %s", _tmp_json["type"]
541                    )
542                    match _tmp_json["type"]:
543                        case SpeakWebSocketMessage.Speak:
544                            inspect_res = self._inspect()
545                            if not inspect_res:
546                                self._logger.error("inspect_res failed")
547                        case SpeakWebSocketMessage.Flush:
548                            with self._lock_flush:
549                                self._last_datagram = None
550                                self._flush_count += 1
551                                self._logger.debug(
552                                    "Increment Flush count: %d", self._flush_count
553                                )
554            except Exception as e:  # pylint: disable=broad-except
555                self._logger.error("send_raw() failed - Exception: %s", str(e))
556
557        try:
558            if super().send(msg) is False:
559                self._logger.error("send_raw() failed")
560                self._logger.spam("SpeakWebSocketClient.send_raw LEAVE")
561                return False
562            self._logger.spam("send_raw() succeeded")
563            self._logger.spam("SpeakWebSocketClient.send_raw LEAVE")
564            return True
565        except Exception as e:  # pylint: disable=broad-except
566            self._logger.error("send_raw() failed - Exception: %s", str(e))
567            self._logger.spam("SpeakWebSocketClient.send_raw LEAVE")
568            if self._config.options.get("termination_exception_send") is True:
569                raise
570            return False
571
572    # pylint: enable=too-many-return-statements,too-many-branches
573
574    def flush(self) -> bool:
575        """
576        Flushes the current buffer and returns generated audio
577        """
578        self._logger.spam("SpeakWebSocketClient.flush ENTER")
579
580        self._logger.notice("Sending Flush...")
581        ret = self.send_control(SpeakWebSocketMessage.Flush)
582
583        if not ret:
584            self._logger.error("flush failed")
585            self._logger.spam("SpeakWebSocketClient.flush LEAVE")
586            return False
587
588        self._logger.notice("flush succeeded")
589        self._logger.spam("SpeakWebSocketClient.flush LEAVE")
590
591        return True
592
593    def clear(self) -> bool:
594        """
595        Clears the current buffer on the server
596        """
597        self._logger.spam("SpeakWebSocketClient.clear ENTER")
598
599        self._logger.notice("Sending Clear...")
600        ret = self.send_control(SpeakWebSocketMessage.Clear)
601
602        if not ret:
603            self._logger.error("clear failed")
604            self._logger.spam("SpeakWebSocketClient.clear LEAVE")
605            return False
606
607        self._logger.notice("clear succeeded")
608        self._logger.spam("SpeakWebSocketClient.clear LEAVE")
609
610        return True
611
612    def wait_for_complete(self):
613        """
614        This method will block until the speak is done playing sound.
615        """
616        self._logger.spam("SpeakWebSocketClient.wait_for_complete ENTER")
617
618        if self._speaker is None:
619            self._logger.error("speaker is None. Return immediately")
620            raise DeepgramError("Speaker is not initialized")
621
622        self._speaker.wait_for_complete()
623        self._logger.notice("wait_for_complete succeeded")
624        self._logger.spam("SpeakWebSocketClient.wait_for_complete LEAVE")
625
626    def _close_message(self) -> bool:
627        return self.send_control(SpeakWebSocketMessage.Close)
628
629    # closes the WebSocket connection gracefully
630    def finish(self) -> bool:
631        """
632        Closes the WebSocket connection gracefully.
633        """
634        self._logger.spam("SpeakWebSocketClient.finish ENTER")
635
636        # call parent finish which calls signal_exit
637        if super().finish() is False:
638            self._logger.error("ListenWebSocketClient.finish failed")
639
640        if self._speaker is not None and self._speaker_created:
641            self._speaker.finish()
642            self._speaker_created = False
643
644        # debug the threads
645        for thread in threading.enumerate():
646            self._logger.debug("before running thread: %s", thread.name)
647        self._logger.debug("number of active threads: %s", threading.active_count())
648
649        # stop the threads
650        if self._speaker is not None:
651            self._logger.verbose("stopping speaker...")
652            self._speaker.finish()
653            self._speaker = None
654            self._logger.notice("speaker stopped")
655
656        if self._flush_thread is not None:
657            self._logger.verbose("sdtopping _flush_thread...")
658            self._flush_thread.join()
659            self._flush_thread = None
660            self._logger.notice("_flush_thread joined")
661
662        # debug the threads
663        for thread in threading.enumerate():
664            self._logger.debug("before running thread: %s", thread.name)
665        self._logger.debug("number of active threads: %s", threading.active_count())
666
667        self._logger.notice("finish succeeded")
668        self._logger.spam("SpeakWebSocketClient.finish LEAVE")
669        return True
670
671    def _inspect(self) -> bool:
672        # auto flush_inspect is generically used to track any messages you might want to snoop on
673        # place additional logic here to inspect messages of interest
674
675        # for auto flush functionality
676        # set the last datagram
677        with self._lock_flush:
678            self._last_datagram = datetime.now()
679            self._logger.debug(
680                "AutoFlush last received: %s",
681                str(self._last_datagram),
682            )
683
684        return True

Client for interacting with Deepgram's text-to-speech services over WebSockets.

This class provides methods to establish a WebSocket connection for TTS synthesis and handle real-time TTS synthesis events.

Args: config (DeepgramClientOptions): all the options for the client.

SpeakWSClient( config: deepgram.options.DeepgramClientOptions, microphone: Optional[deepgram.audio.microphone.microphone.Microphone] = None)
 72    def __init__(
 73        self, config: DeepgramClientOptions, microphone: Optional[Microphone] = None
 74    ):
 75        if config is None:
 76            raise DeepgramError("Config is required")
 77
 78        self._logger = verboselogs.VerboseLogger(__name__)
 79        self._logger.addHandler(logging.StreamHandler())
 80        self._logger.setLevel(config.verbose)
 81
 82        self._config = config
 83        self._endpoint = "v1/speak"
 84        self._lock_flush = threading.Lock()
 85
 86        self._flush_thread = None
 87
 88        # auto flush
 89        self._last_datagram = None
 90        self._flush_count = 0
 91
 92        # microphone
 93        self._microphone = microphone
 94
 95        # init handlers
 96        self._event_handlers = {
 97            event: [] for event in SpeakWebSocketEvents.__members__.values()
 98        }
 99
100        if self._config.options.get("speaker_playback") == "true":
101            self._logger.info("speaker_playback is enabled")
102            rate = self._config.options.get("speaker_playback_rate")
103            if rate is None:
104                rate = RATE
105            channels = self._config.options.get("speaker_playback_channels")
106            if channels is None:
107                channels = CHANNELS
108            playback_delta_in_ms = self._config.options.get(
109                "speaker_playback_delta_in_ms"
110            )
111            if playback_delta_in_ms is None:
112                playback_delta_in_ms = PLAYBACK_DELTA
113            device_index = self._config.options.get("speaker_playback_device_index")
114
115            self._logger.debug("rate: %s", rate)
116            self._logger.debug("channels: %s", channels)
117            self._logger.debug("device_index: %s", device_index)
118
119            self._speaker_created = True
120
121            if device_index is not None:
122                self._speaker = Speaker(
123                    rate=rate,
124                    channels=channels,
125                    last_play_delta_in_ms=playback_delta_in_ms,
126                    verbose=self._config.verbose,
127                    output_device_index=device_index,
128                    microphone=self._microphone,
129                )
130            else:
131                self._speaker = Speaker(
132                    rate=rate,
133                    channels=channels,
134                    last_play_delta_in_ms=playback_delta_in_ms,
135                    verbose=self._config.verbose,
136                    microphone=self._microphone,
137                )
138
139        # call the parent constructor
140        super().__init__(self._config, self._endpoint)
def start( self, options: Union[SpeakWSOptions, Dict, NoneType] = None, addons: Optional[Dict] = None, headers: Optional[Dict] = None, members: Optional[Dict] = None, **kwargs) -> bool:
143    def start(
144        self,
145        options: Optional[Union[SpeakWSOptions, Dict]] = None,
146        addons: Optional[Dict] = None,
147        headers: Optional[Dict] = None,
148        members: Optional[Dict] = None,
149        **kwargs,
150    ) -> bool:
151        """
152        Starts the WebSocket connection for text-to-speech synthesis.
153        """
154        self._logger.debug("SpeakWebSocketClient.start ENTER")
155        self._logger.info("options: %s", options)
156        self._logger.info("addons: %s", addons)
157        self._logger.info("headers: %s", headers)
158        self._logger.info("members: %s", members)
159        self._logger.info("kwargs: %s", kwargs)
160
161        if isinstance(options, SpeakWSOptions) and not options.check():
162            self._logger.error("options.check failed")
163            self._logger.debug("SpeakWebSocketClient.start LEAVE")
164            raise DeepgramError("Fatal text-to-speech options error")
165
166        self._addons = addons
167        self._headers = headers
168
169        # add "members" as members of the class
170        if members is not None:
171            self.__dict__.update(members)
172
173        # set kwargs as members of the class
174        if kwargs is not None:
175            self._kwargs = kwargs
176        else:
177            self._kwargs = {}
178
179        if isinstance(options, SpeakWSOptions):
180            self._logger.info("SpeakWSOptions switching class -> dict")
181            self._options = options.to_dict()
182        elif options is not None:
183            self._options = options
184        else:
185            self._options = {}
186
187        try:
188            # speaker substitutes the listening thread
189            if self._speaker is not None:
190                self._logger.notice("passing speaker to delegate_listening")
191                super().delegate_listening(self._speaker)
192
193            # call parent start
194            if (
195                super().start(
196                    self._options,
197                    self._addons,
198                    self._headers,
199                    **dict(cast(Dict[Any, Any], self._kwargs)),
200                )
201                is False
202            ):
203                self._logger.error("SpeakWebSocketClient.start failed")
204                self._logger.debug("SpeakWebSocketClient.start LEAVE")
205                return False
206
207            if self._speaker is not None:
208                self._logger.notice("start delegate_listening thread")
209                self._speaker.start()
210
211            # debug the threads
212            for thread in threading.enumerate():
213                self._logger.debug("after running thread: %s", thread.name)
214            self._logger.debug("number of active threads: %s", threading.active_count())
215
216            # flush thread
217            if self._config.is_auto_flush_speak_enabled():
218                self._logger.notice("autoflush is enabled")
219                self._flush_thread = threading.Thread(target=self._flush)
220                self._flush_thread.start()
221            else:
222                self._logger.notice("autoflush is disabled")
223
224            # debug the threads
225            for thread in threading.enumerate():
226                self._logger.debug("after running thread: %s", thread.name)
227            self._logger.debug("number of active threads: %s", threading.active_count())
228
229            self._logger.notice("start succeeded")
230            self._logger.debug("SpeakWebSocketClient.start LEAVE")
231            return True
232
233        except Exception as e:  # pylint: disable=broad-except
234            self._logger.error(
235                "WebSocketException in SpeakWebSocketClient.start: %s", e
236            )
237            self._logger.debug("SpeakWebSocketClient.start LEAVE")
238            if self._config.options.get("termination_exception_connect") is True:
239                raise
240            return False

Starts the WebSocket connection for text-to-speech synthesis.

def on( self, event: deepgram.clients.speak.enums.SpeakWebSocketEvents, handler: Callable) -> None:
244    def on(self, event: SpeakWebSocketEvents, handler: Callable) -> None:
245        """
246        Registers event handlers for specific events.
247        """
248        self._logger.info("event subscribed: %s", event)
249        if event in SpeakWebSocketEvents.__members__.values() and callable(handler):
250            self._event_handlers[event].append(handler)

Registers event handlers for specific events.

def send_text(self, text_input: str) -> bool:
480    def send_text(self, text_input: str) -> bool:
481        """
482        Sends text to the WebSocket connection to generate audio.
483
484        Args:
485            text_input (str): The raw text to be synthesized. This function will automatically wrap
486                the text in a JSON object of type "Speak" with the key "text".
487
488        Returns:
489            bool: True if the text was successfully sent, False otherwise.
490        """
491        return self.send_raw(json.dumps({"type": "Speak", "text": text_input}))

Sends text to the WebSocket connection to generate audio.

Args: text_input (str): The raw text to be synthesized. This function will automatically wrap the text in a JSON object of type "Speak" with the key "text".

Returns: bool: True if the text was successfully sent, False otherwise.

def send(self, data: Union[str, bytes]) -> bool:
493    def send(self, data: Union[str, bytes]) -> bool:
494        """
495        Alias for send_text. Please see send_text for more information.
496        """
497        if isinstance(data, bytes):
498            self._logger.error("send() failed - data is bytes")
499            return False
500
501        return self.send_text(data)

Alias for send_text. Please see send_text for more information.

def send_control( self, msg_type: Union[deepgram.clients.speak.enums.SpeakWebSocketMessage, str], data: Optional[str] = '') -> bool:
504    def send_control(
505        self, msg_type: Union[SpeakWebSocketMessage, str], data: Optional[str] = ""
506    ) -> bool:
507        """
508        Sends a control message consisting of type SpeakWebSocketEvents over the WebSocket connection.
509
510        Args:
511            msg_type (SpeakWebSocketEvents): The type of control message to send.
512            (Optional) data (str): The data to send with the control message.
513
514        Returns:
515            bool: True if the control message was successfully sent, False otherwise.
516        """
517        control_msg = json.dumps({"type": msg_type})
518        return self.send_raw(control_msg)

Sends a control message consisting of type SpeakWebSocketEvents over the WebSocket connection.

Args: msg_type (SpeakWebSocketEvents): The type of control message to send. (Optional) data (str): The data to send with the control message.

Returns: bool: True if the control message was successfully sent, False otherwise.

def send_raw(self, msg: str) -> bool:
523    def send_raw(self, msg: str) -> bool:
524        """
525        Sends a raw/control message over the WebSocket connection. This message must contain a valid JSON object.
526
527        Args:
528            msg (str): The raw message to send over the WebSocket connection.
529
530        Returns:
531            bool: True if the message was successfully sent, False otherwise.
532        """
533        self._logger.spam("SpeakWebSocketClient.send_raw ENTER")
534
535        if self._config.is_inspecting_speak():
536            try:
537                _tmp_json = json.loads(msg)
538                if "type" in _tmp_json:
539                    self._logger.debug(
540                        "Inspecting Message: Sending %s", _tmp_json["type"]
541                    )
542                    match _tmp_json["type"]:
543                        case SpeakWebSocketMessage.Speak:
544                            inspect_res = self._inspect()
545                            if not inspect_res:
546                                self._logger.error("inspect_res failed")
547                        case SpeakWebSocketMessage.Flush:
548                            with self._lock_flush:
549                                self._last_datagram = None
550                                self._flush_count += 1
551                                self._logger.debug(
552                                    "Increment Flush count: %d", self._flush_count
553                                )
554            except Exception as e:  # pylint: disable=broad-except
555                self._logger.error("send_raw() failed - Exception: %s", str(e))
556
557        try:
558            if super().send(msg) is False:
559                self._logger.error("send_raw() failed")
560                self._logger.spam("SpeakWebSocketClient.send_raw LEAVE")
561                return False
562            self._logger.spam("send_raw() succeeded")
563            self._logger.spam("SpeakWebSocketClient.send_raw LEAVE")
564            return True
565        except Exception as e:  # pylint: disable=broad-except
566            self._logger.error("send_raw() failed - Exception: %s", str(e))
567            self._logger.spam("SpeakWebSocketClient.send_raw LEAVE")
568            if self._config.options.get("termination_exception_send") is True:
569                raise
570            return False

Sends a raw/control message over the WebSocket connection. This message must contain a valid JSON object.

Args: msg (str): The raw message to send over the WebSocket connection.

Returns: bool: True if the message was successfully sent, False otherwise.

def flush(self) -> bool:
574    def flush(self) -> bool:
575        """
576        Flushes the current buffer and returns generated audio
577        """
578        self._logger.spam("SpeakWebSocketClient.flush ENTER")
579
580        self._logger.notice("Sending Flush...")
581        ret = self.send_control(SpeakWebSocketMessage.Flush)
582
583        if not ret:
584            self._logger.error("flush failed")
585            self._logger.spam("SpeakWebSocketClient.flush LEAVE")
586            return False
587
588        self._logger.notice("flush succeeded")
589        self._logger.spam("SpeakWebSocketClient.flush LEAVE")
590
591        return True

Flushes the current buffer and returns generated audio

def clear(self) -> bool:
593    def clear(self) -> bool:
594        """
595        Clears the current buffer on the server
596        """
597        self._logger.spam("SpeakWebSocketClient.clear ENTER")
598
599        self._logger.notice("Sending Clear...")
600        ret = self.send_control(SpeakWebSocketMessage.Clear)
601
602        if not ret:
603            self._logger.error("clear failed")
604            self._logger.spam("SpeakWebSocketClient.clear LEAVE")
605            return False
606
607        self._logger.notice("clear succeeded")
608        self._logger.spam("SpeakWebSocketClient.clear LEAVE")
609
610        return True

Clears the current buffer on the server

def wait_for_complete(self):
612    def wait_for_complete(self):
613        """
614        This method will block until the speak is done playing sound.
615        """
616        self._logger.spam("SpeakWebSocketClient.wait_for_complete ENTER")
617
618        if self._speaker is None:
619            self._logger.error("speaker is None. Return immediately")
620            raise DeepgramError("Speaker is not initialized")
621
622        self._speaker.wait_for_complete()
623        self._logger.notice("wait_for_complete succeeded")
624        self._logger.spam("SpeakWebSocketClient.wait_for_complete LEAVE")

This method will block until the speak is done playing sound.

def finish(self) -> bool:
630    def finish(self) -> bool:
631        """
632        Closes the WebSocket connection gracefully.
633        """
634        self._logger.spam("SpeakWebSocketClient.finish ENTER")
635
636        # call parent finish which calls signal_exit
637        if super().finish() is False:
638            self._logger.error("ListenWebSocketClient.finish failed")
639
640        if self._speaker is not None and self._speaker_created:
641            self._speaker.finish()
642            self._speaker_created = False
643
644        # debug the threads
645        for thread in threading.enumerate():
646            self._logger.debug("before running thread: %s", thread.name)
647        self._logger.debug("number of active threads: %s", threading.active_count())
648
649        # stop the threads
650        if self._speaker is not None:
651            self._logger.verbose("stopping speaker...")
652            self._speaker.finish()
653            self._speaker = None
654            self._logger.notice("speaker stopped")
655
656        if self._flush_thread is not None:
657            self._logger.verbose("sdtopping _flush_thread...")
658            self._flush_thread.join()
659            self._flush_thread = None
660            self._logger.notice("_flush_thread joined")
661
662        # debug the threads
663        for thread in threading.enumerate():
664            self._logger.debug("before running thread: %s", thread.name)
665        self._logger.debug("number of active threads: %s", threading.active_count())
666
667        self._logger.notice("finish succeeded")
668        self._logger.spam("SpeakWebSocketClient.finish LEAVE")
669        return True

Closes the WebSocket connection gracefully.

 40class AsyncSpeakWSClient(
 41    AbstractAsyncWebSocketClient
 42):  # pylint: disable=too-many-instance-attributes
 43    """
 44    Client for interacting with Deepgram's text-to-speech services over WebSockets.
 45
 46     This class provides methods to establish a WebSocket connection for TTS synthesis and handle real-time TTS synthesis events.
 47
 48     Args:
 49         config (DeepgramClientOptions): all the options for the client.
 50    """
 51
 52    _logger: verboselogs.VerboseLogger
 53    _config: DeepgramClientOptions
 54    _endpoint: str
 55
 56    _event_handlers: Dict[SpeakWebSocketEvents, list]
 57
 58    _flush_thread: Union[asyncio.Task, None]
 59    _last_datagram: Optional[datetime] = None
 60    _flush_count: int
 61
 62    _kwargs: Optional[Dict] = None
 63    _addons: Optional[Dict] = None
 64    _options: Optional[Dict] = None
 65    _headers: Optional[Dict] = None
 66
 67    _speaker_created: bool = False
 68    _speaker: Optional[Speaker] = None
 69    _microphone: Optional[Microphone] = None
 70
 71    def __init__(
 72        self, config: DeepgramClientOptions, microphone: Optional[Microphone] = None
 73    ):
 74        if config is None:
 75            raise DeepgramError("Config is required")
 76        self._logger = verboselogs.VerboseLogger(__name__)
 77        self._logger.addHandler(logging.StreamHandler())
 78        self._logger.setLevel(config.verbose)
 79
 80        self._config = config
 81        self._endpoint = "v1/speak"
 82
 83        self._flush_thread = None
 84
 85        # auto flush
 86        self._last_datagram = None
 87        self._flush_count = 0
 88
 89        # microphone
 90        self._microphone = microphone
 91
 92        # init handlers
 93        self._event_handlers = {
 94            event: [] for event in SpeakWebSocketEvents.__members__.values()
 95        }
 96
 97        if self._config.options.get("speaker_playback") == "true":
 98            self._logger.info("speaker_playback is enabled")
 99            rate = self._config.options.get("speaker_playback_rate")
100            if rate is None:
101                rate = RATE
102            channels = self._config.options.get("speaker_playback_channels")
103            if channels is None:
104                channels = CHANNELS
105            playback_delta_in_ms = self._config.options.get(
106                "speaker_playback_delta_in_ms"
107            )
108            if playback_delta_in_ms is None:
109                playback_delta_in_ms = PLAYBACK_DELTA
110            device_index = self._config.options.get("speaker_playback_device_index")
111
112            self._logger.debug("rate: %s", rate)
113            self._logger.debug("channels: %s", channels)
114            self._logger.debug("device_index: %s", device_index)
115
116            self._speaker_created = True
117
118            if device_index is not None:
119                self._speaker = Speaker(
120                    rate=rate,
121                    channels=channels,
122                    last_play_delta_in_ms=playback_delta_in_ms,
123                    verbose=self._config.verbose,
124                    output_device_index=device_index,
125                    microphone=self._microphone,
126                )
127            else:
128                self._speaker = Speaker(
129                    rate=rate,
130                    channels=channels,
131                    last_play_delta_in_ms=playback_delta_in_ms,
132                    verbose=self._config.verbose,
133                    microphone=self._microphone,
134                )
135
136        # call the parent constructor
137        super().__init__(self._config, self._endpoint)
138
139    # pylint: disable=too-many-branches,too-many-statements
140    async def start(
141        self,
142        options: Optional[Union[SpeakWSOptions, Dict]] = None,
143        addons: Optional[Dict] = None,
144        headers: Optional[Dict] = None,
145        members: Optional[Dict] = None,
146        **kwargs,
147    ) -> bool:
148        """
149        Starts the WebSocket connection for text-to-speech synthesis.
150        """
151        self._logger.debug("AsyncSpeakWebSocketClient.start ENTER")
152        self._logger.info("options: %s", options)
153        self._logger.info("addons: %s", addons)
154        self._logger.info("headers: %s", headers)
155        self._logger.info("members: %s", members)
156        self._logger.info("kwargs: %s", kwargs)
157
158        if isinstance(options, SpeakWSOptions) and not options.check():
159            self._logger.error("options.check failed")
160            self._logger.debug("AsyncSpeakWebSocketClient.start LEAVE")
161            raise DeepgramError("Fatal text-to-speech options error")
162
163        self._addons = addons
164        self._headers = headers
165
166        # add "members" as members of the class
167        if members is not None:
168            self.__dict__.update(members)
169
170        # set kwargs as members of the class
171        if kwargs is not None:
172            self._kwargs = kwargs
173        else:
174            self._kwargs = {}
175
176        if isinstance(options, SpeakWSOptions):
177            self._logger.info("SpeakWSOptions switching class -> dict")
178            self._options = options.to_dict()
179        elif options is not None:
180            self._options = options
181        else:
182            self._options = {}
183
184        try:
185            # speaker substitutes the listening thread
186            if self._speaker is not None:
187                self._logger.notice("passing speaker to delegate_listening")
188                super().delegate_listening(self._speaker)
189
190            # call parent start
191            if (
192                await super().start(
193                    self._options,
194                    self._addons,
195                    self._headers,
196                    **dict(cast(Dict[Any, Any], self._kwargs)),
197                )
198                is False
199            ):
200                self._logger.error("AsyncSpeakWebSocketClient.start failed")
201                self._logger.debug("AsyncSpeakWebSocketClient.start LEAVE")
202                return False
203
204            if self._speaker is not None:
205                self._logger.notice("start delegate_listening thread")
206                self._speaker.start()
207
208            # debug the threads
209            for thread in threading.enumerate():
210                self._logger.debug("after running thread: %s", thread.name)
211            self._logger.debug("number of active threads: %s", threading.active_count())
212
213            # flush thread
214            if self._config.is_auto_flush_speak_enabled():
215                self._logger.notice("autoflush is enabled")
216                self._flush_thread = asyncio.create_task(self._flush())
217            else:
218                self._logger.notice("autoflush is disabled")
219
220            # debug the threads
221            for thread in threading.enumerate():
222                self._logger.debug("after running thread: %s", thread.name)
223            self._logger.debug("number of active threads: %s", threading.active_count())
224
225            self._logger.notice("start succeeded")
226            self._logger.debug("AsyncSpeakWebSocketClient.start LEAVE")
227            return True
228
229        except Exception as e:  # pylint: disable=broad-except
230            self._logger.error(
231                "WebSocketException in AsyncSpeakWebSocketClient.start: %s", e
232            )
233            self._logger.debug("AsyncSpeakWebSocketClient.start LEAVE")
234            if self._config.options.get("termination_exception_connect") is True:
235                raise
236            return False
237
238    # pylint: enable=too-many-branches,too-many-statements
239
240    def on(self, event: SpeakWebSocketEvents, handler: Callable) -> None:
241        """
242        Registers event handlers for specific events.
243        """
244        self._logger.info("event subscribed: %s", event)
245        if event in SpeakWebSocketEvents.__members__.values() and callable(handler):
246            self._event_handlers[event].append(handler)
247
248    # triggers the registered event handlers for a specific event
249    async def _emit(self, event: SpeakWebSocketEvents, *args, **kwargs) -> None:
250        """
251        Emits events to the registered event handlers.
252        """
253        self._logger.debug("AsyncSpeakWebSocketClient._emit ENTER")
254        self._logger.debug("callback handlers for: %s", event)
255
256        # debug the threads
257        for thread in threading.enumerate():
258            self._logger.debug("after running thread: %s", thread.name)
259        self._logger.debug("number of active threads: %s", threading.active_count())
260
261        tasks = []
262        for handler in self._event_handlers[event]:
263            task = asyncio.create_task(handler(self, *args, **kwargs))
264            tasks.append(task)
265
266        if tasks:
267            self._logger.debug("waiting for tasks to finish...")
268            await asyncio.gather(*filter(None, tasks), return_exceptions=True)
269            tasks.clear()
270
271        # debug the threads
272        for thread in threading.enumerate():
273            self._logger.debug("after running thread: %s", thread.name)
274        self._logger.debug("number of active threads: %s", threading.active_count())
275
276        self._logger.debug("AsyncSpeakWebSocketClient._emit LEAVE")
277
278    async def _process_text(self, message: Union[str, bytes]) -> None:
279        """
280        Processes messages received over the WebSocket connection.
281        """
282        self._logger.debug("AsyncSpeakWebSocketClient._process_text ENTER")
283
284        try:
285            self._logger.debug("Text data received")
286
287            if len(message) == 0:
288                self._logger.debug("message is empty")
289                self._logger.debug("AsyncSpeakWebSocketClient._process_text LEAVE")
290                return
291
292            data = json.loads(message)
293            response_type = data.get("type")
294            self._logger.debug("response_type: %s, data: %s", response_type, data)
295
296            match response_type:
297                case SpeakWebSocketEvents.Open:
298                    open_result: OpenResponse = OpenResponse.from_json(message)
299                    self._logger.verbose("OpenResponse: %s", open_result)
300                    await self._emit(
301                        SpeakWebSocketEvents(SpeakWebSocketEvents.Open),
302                        open=open_result,
303                        **dict(cast(Dict[Any, Any], self._kwargs)),
304                    )
305                case SpeakWebSocketEvents.Metadata:
306                    meta_result: MetadataResponse = MetadataResponse.from_json(message)
307                    self._logger.verbose("MetadataResponse: %s", meta_result)
308                    await self._emit(
309                        SpeakWebSocketEvents(SpeakWebSocketEvents.Metadata),
310                        metadata=meta_result,
311                        **dict(cast(Dict[Any, Any], self._kwargs)),
312                    )
313                case SpeakWebSocketEvents.Flushed:
314                    fl_result: FlushedResponse = FlushedResponse.from_json(message)
315                    self._logger.verbose("FlushedResponse: %s", fl_result)
316
317                    # auto flush
318                    if self._config.is_inspecting_speak():
319                        self._flush_count -= 1
320                        self._logger.debug(
321                            "Decrement AutoFlush count: %d",
322                            self._flush_count,
323                        )
324
325                    await self._emit(
326                        SpeakWebSocketEvents(SpeakWebSocketEvents.Flushed),
327                        flushed=fl_result,
328                        **dict(cast(Dict[Any, Any], self._kwargs)),
329                    )
330                case SpeakWebSocketEvents.Cleared:
331                    clear_result: ClearedResponse = ClearedResponse.from_json(message)
332                    self._logger.verbose("ClearedResponse: %s", clear_result)
333                    await self._emit(
334                        SpeakWebSocketEvents(SpeakWebSocketEvents.Cleared),
335                        cleared=clear_result,
336                        **dict(cast(Dict[Any, Any], self._kwargs)),
337                    )
338                case SpeakWebSocketEvents.Close:
339                    close_result: CloseResponse = CloseResponse.from_json(message)
340                    self._logger.verbose("CloseResponse: %s", close_result)
341                    await self._emit(
342                        SpeakWebSocketEvents(SpeakWebSocketEvents.Close),
343                        close=close_result,
344                        **dict(cast(Dict[Any, Any], self._kwargs)),
345                    )
346                case SpeakWebSocketEvents.Warning:
347                    war_warning: WarningResponse = WarningResponse.from_json(message)
348                    self._logger.verbose("WarningResponse: %s", war_warning)
349                    await self._emit(
350                        SpeakWebSocketEvents(SpeakWebSocketEvents.Warning),
351                        warning=war_warning,
352                        **dict(cast(Dict[Any, Any], self._kwargs)),
353                    )
354                case SpeakWebSocketEvents.Error:
355                    err_error: ErrorResponse = ErrorResponse.from_json(message)
356                    self._logger.verbose("ErrorResponse: %s", err_error)
357                    await self._emit(
358                        SpeakWebSocketEvents(SpeakWebSocketEvents.Error),
359                        error=err_error,
360                        **dict(cast(Dict[Any, Any], self._kwargs)),
361                    )
362                case _:
363                    self._logger.warning(
364                        "Unknown Message: response_type: %s, data: %s",
365                        response_type,
366                        data,
367                    )
368                    unhandled_error: UnhandledResponse = UnhandledResponse(
369                        type=SpeakWebSocketEvents(SpeakWebSocketEvents.Unhandled),
370                        raw=str(message),
371                    )
372                    await self._emit(
373                        SpeakWebSocketEvents(SpeakWebSocketEvents.Unhandled),
374                        unhandled=unhandled_error,
375                        **dict(cast(Dict[Any, Any], self._kwargs)),
376                    )
377
378            self._logger.notice("_process_text Succeeded")
379            self._logger.debug("AsyncSpeakWebSocketClient._process_text LEAVE")
380
381        except Exception as e:  # pylint: disable=broad-except
382            self._logger.error(
383                "Exception in AsyncSpeakWebSocketClient._process_text: %s", e
384            )
385            e_error: ErrorResponse = ErrorResponse(
386                "Exception in AsyncSpeakWebSocketClient._process_text",
387                f"{e}",
388                "Exception",
389            )
390            await self._emit(
391                SpeakWebSocketEvents(SpeakWebSocketEvents.Error),
392                error=e_error,
393                **dict(cast(Dict[Any, Any], self._kwargs)),
394            )
395
396            # signal exit and close
397            await super()._signal_exit()
398
399            self._logger.debug("AsyncSpeakWebSocketClient._process_text LEAVE")
400
401            if self._config.options.get("termination_exception") is True:
402                raise
403            return
404
405    # pylint: enable=too-many-return-statements,too-many-statements
406
407    async def _process_binary(self, message: bytes) -> None:
408        self._logger.debug("SpeakWebSocketClient._process_binary ENTER")
409        self._logger.debug("Binary data received")
410
411        await self._emit(
412            SpeakWebSocketEvents(SpeakWebSocketEvents.AudioData),
413            data=message,
414            **dict(cast(Dict[Any, Any], self._kwargs)),
415        )
416
417        self._logger.notice("_process_binary Succeeded")
418        self._logger.debug("SpeakWebSocketClient._process_binary LEAVE")
419
420    ## pylint: disable=too-many-return-statements
421    async def _flush(self) -> None:
422        self._logger.debug("AsyncSpeakWebSocketClient._flush ENTER")
423
424        delta_in_ms_str = self._config.options.get("auto_flush_speak_delta")
425        if delta_in_ms_str is None:
426            self._logger.error("auto_flush_speak_delta is None")
427            self._logger.debug("AsyncSpeakWebSocketClient._flush LEAVE")
428            return
429        delta_in_ms = float(delta_in_ms_str)
430
431        while True:
432            try:
433                await asyncio.sleep(HALF_SECOND)
434
435                if self._exit_event.is_set():
436                    self._logger.notice("_flush exiting gracefully")
437                    self._logger.debug("AsyncSpeakWebSocketClient._flush LEAVE")
438                    return
439
440                if self._last_datagram is None:
441                    self._logger.debug("AutoFlush last_datagram is None")
442                    continue
443
444                delta = datetime.now() - self._last_datagram
445                diff_in_ms = delta.total_seconds() * 1000
446                self._logger.debug("AutoFlush delta: %f", diff_in_ms)
447                if diff_in_ms < delta_in_ms:
448                    self._logger.debug("AutoFlush delta is less than threshold")
449                    continue
450
451                await self.flush()
452
453            except Exception as e:  # pylint: disable=broad-except
454                self._logger.error(
455                    "Exception in AsyncSpeakWebSocketClient._flush: %s", e
456                )
457                e_error: ErrorResponse = ErrorResponse(
458                    "Exception in AsyncSpeakWebSocketClient._flush",
459                    f"{e}",
460                    "Exception",
461                )
462                self._logger.error(
463                    "Exception in AsyncSpeakWebSocketClient._flush: %s", str(e)
464                )
465                await self._emit(
466                    SpeakWebSocketEvents(SpeakWebSocketEvents.Error),
467                    error=e_error,
468                    **dict(cast(Dict[Any, Any], self._kwargs)),
469                )
470
471                # signal exit and close
472                await super()._signal_exit()
473
474                self._logger.debug("AsyncSpeakWebSocketClient._flush LEAVE")
475
476                if self._config.options.get("termination_exception") is True:
477                    raise
478                return
479
480    # pylint: enable=too-many-return-statements
481
482    async def send_text(self, text_input: str) -> bool:
483        """
484        Sends text to the WebSocket connection to generate audio.
485
486        Args:
487            text_input (str): The raw text to be synthesized. This function will automatically wrap
488                the text in a JSON object of type "Speak" with the key "text".
489
490        Returns:
491            bool: True if the text was successfully sent, False otherwise.
492        """
493        return await self.send_raw(json.dumps({"type": "Speak", "text": text_input}))
494
495    async def send(self, data: Union[bytes, str]) -> bool:
496        """
497        Alias for send_text. Please see send_text for more information.
498        """
499        if isinstance(data, bytes):
500            self._logger.error("send() failed - data is bytes")
501            return False
502
503        return await self.send_text(data)
504
505    # pylint: disable=unused-argument
506    async def send_control(
507        self, msg_type: Union[SpeakWebSocketMessage, str], data: Optional[str] = ""
508    ) -> bool:
509        """
510        Sends a control message consisting of type SpeakWebSocketEvents over the WebSocket connection.
511
512        Args:
513            msg_type (SpeakWebSocketEvents): The type of control message to send.
514            (Optional) data (str): The data to send with the control message.
515
516        Returns:
517            bool: True if the control message was successfully sent, False otherwise.
518        """
519        control_msg = json.dumps({"type": msg_type})
520        return await self.send_raw(control_msg)
521
522    # pylint: enable=unused-argument
523
524    # pylint: disable=too-many-return-statements,too-many-branches,too-many-statements
525    async def send_raw(self, msg: str) -> bool:
526        """
527        Sends a raw/control message over the WebSocket connection. This message must contain a valid JSON object.
528
529        Args:
530            msg (str): The raw message to send over the WebSocket connection.
531
532        Returns:
533            bool: True if the message was successfully sent, False otherwise.
534        """
535        self._logger.spam("AsyncSpeakWebSocketClient.send_raw ENTER")
536
537        if self._config.is_inspecting_speak():
538            try:
539                _tmp_json = json.loads(msg)
540                if "type" in _tmp_json:
541                    self._logger.debug(
542                        "Inspecting Message: Sending %s", _tmp_json["type"]
543                    )
544                    match _tmp_json["type"]:
545                        case SpeakWebSocketMessage.Speak:
546                            inspect_res = await self._inspect()
547                            if not inspect_res:
548                                self._logger.error("inspect_res failed")
549                        case SpeakWebSocketMessage.Flush:
550                            self._last_datagram = None
551                            self._flush_count += 1
552                            self._logger.debug(
553                                "Increment Flush count: %d", self._flush_count
554                            )
555            except Exception as e:  # pylint: disable=broad-except
556                self._logger.error("send_raw() failed - Exception: %s", str(e))
557
558        try:
559            if await super().send(msg) is False:
560                self._logger.error("send_raw() failed")
561                self._logger.spam("AsyncSpeakWebSocketClient.send_raw LEAVE")
562                return False
563            self._logger.spam("send_raw() succeeded")
564            self._logger.spam("AsyncSpeakWebSocketClient.send_raw LEAVE")
565            return True
566        except Exception as e:  # pylint: disable=broad-except
567            self._logger.error("send_raw() failed - Exception: %s", str(e))
568            self._logger.spam("AsyncSpeakWebSocketClient.send_raw LEAVE")
569            if self._config.options.get("termination_exception_send") is True:
570                raise
571            return False
572
573    # pylint: enable=too-many-return-statements,too-many-branches
574
575    async def flush(self) -> bool:
576        """
577        Flushes the current buffer and returns generated audio
578        """
579        self._logger.spam("AsyncSpeakWebSocketClient.flush ENTER")
580
581        self._logger.notice("Sending Flush...")
582        ret = await self.send_control(SpeakWebSocketMessage.Flush)
583
584        if not ret:
585            self._logger.error("flush failed")
586            self._logger.spam("AsyncSpeakWebSocketClient.flush LEAVE")
587            return False
588
589        self._logger.notice("flush succeeded")
590        self._logger.spam("AsyncSpeakWebSocketClient.flush LEAVE")
591
592        return True
593
594    async def clear(self) -> bool:
595        """
596        Clears the current buffer on the server
597        """
598        self._logger.spam("AsyncSpeakWebSocketClient.clear ENTER")
599
600        self._logger.notice("Sending Clear...")
601        ret = await self.send_control(SpeakWebSocketMessage.Clear)
602
603        if not ret:
604            self._logger.error("clear failed")
605            self._logger.spam("AsyncSpeakWebSocketClient.clear LEAVE")
606            return False
607
608        self._logger.notice("clear succeeded")
609        self._logger.spam("AsyncSpeakWebSocketClient.clear LEAVE")
610
611        return True
612
613    async def wait_for_complete(self):
614        """
615        This method will block until the speak is done playing sound.
616        """
617        self._logger.spam("AsyncSpeakWebSocketClient.wait_for_complete ENTER")
618
619        if self._speaker is None:
620            self._logger.error("speaker is None. Return immediately")
621            return
622
623        loop = asyncio.get_event_loop()
624        await loop.run_in_executor(None, self._speaker.wait_for_complete)
625        self._logger.notice("wait_for_complete succeeded")
626        self._logger.spam("AsyncSpeakWebSocketClient.wait_for_complete LEAVE")
627
628    async def _close_message(self) -> bool:
629        return await self.send_control(SpeakWebSocketMessage.Close)
630
631    async def finish(self) -> bool:
632        """
633        Closes the WebSocket connection gracefully.
634        """
635        self._logger.debug("AsyncSpeakWebSocketClient.finish ENTER")
636
637        # stop the threads
638        self._logger.verbose("cancelling tasks...")
639        try:
640            # call parent finish
641            if await super().finish() is False:
642                self._logger.error("AsyncListenWebSocketClient.finish failed")
643
644            if self._speaker is not None and self._speaker_created:
645                self._speaker.finish()
646                self._speaker_created = False
647
648            # Before cancelling, check if the tasks were created
649            # debug the threads
650            for thread in threading.enumerate():
651                self._logger.debug("before running thread: %s", thread.name)
652            self._logger.debug("number of active threads: %s", threading.active_count())
653
654            tasks = []
655
656            if self._speaker is not None:
657                self._logger.notice("stopping speaker...")
658                self._speaker.finish()
659                self._speaker = None
660                self._logger.notice("speaker stopped")
661
662            if self._flush_thread is not None:
663                self._logger.notice("stopping _flush_thread...")
664                self._flush_thread.cancel()
665                tasks.append(self._flush_thread)
666                self._logger.notice("_flush_thread cancelled")
667
668            # Use asyncio.gather to wait for tasks to be cancelled
669            # Prevent indefinite waiting by setting a timeout
670            await asyncio.wait_for(asyncio.gather(*tasks), timeout=10)
671            self._logger.notice("threads joined")
672
673            # debug the threads
674            for thread in threading.enumerate():
675                self._logger.debug("after running thread: %s", thread.name)
676            self._logger.debug("number of active threads: %s", threading.active_count())
677
678            self._logger.notice("finish succeeded")
679            self._logger.spam("AsyncSpeakWebSocketClient.finish LEAVE")
680            return True
681
682        except asyncio.CancelledError:
683            self._logger.debug("tasks cancelled")
684            self._logger.debug("AsyncSpeakWebSocketClient.finish LEAVE")
685            return False
686
687        except asyncio.TimeoutError as e:
688            self._logger.error("tasks cancellation timed out: %s", e)
689            self._logger.debug("AsyncSpeakWebSocketClient.finish LEAVE")
690            return False
691
692    async def _inspect(self) -> bool:
693        # auto flush_inspect is generically used to track any messages you might want to snoop on
694        # place additional logic here to inspect messages of interest
695
696        # for auto flush functionality
697        # set the last datagram
698        self._last_datagram = datetime.now()
699        self._logger.debug(
700            "AutoFlush last received: %s",
701            str(self._last_datagram),
702        )
703
704        return True

Client for interacting with Deepgram's text-to-speech services over WebSockets.

This class provides methods to establish a WebSocket connection for TTS synthesis and handle real-time TTS synthesis events.

Args: config (DeepgramClientOptions): all the options for the client.

AsyncSpeakWSClient( config: deepgram.options.DeepgramClientOptions, microphone: Optional[deepgram.audio.microphone.microphone.Microphone] = None)
 71    def __init__(
 72        self, config: DeepgramClientOptions, microphone: Optional[Microphone] = None
 73    ):
 74        if config is None:
 75            raise DeepgramError("Config is required")
 76        self._logger = verboselogs.VerboseLogger(__name__)
 77        self._logger.addHandler(logging.StreamHandler())
 78        self._logger.setLevel(config.verbose)
 79
 80        self._config = config
 81        self._endpoint = "v1/speak"
 82
 83        self._flush_thread = None
 84
 85        # auto flush
 86        self._last_datagram = None
 87        self._flush_count = 0
 88
 89        # microphone
 90        self._microphone = microphone
 91
 92        # init handlers
 93        self._event_handlers = {
 94            event: [] for event in SpeakWebSocketEvents.__members__.values()
 95        }
 96
 97        if self._config.options.get("speaker_playback") == "true":
 98            self._logger.info("speaker_playback is enabled")
 99            rate = self._config.options.get("speaker_playback_rate")
100            if rate is None:
101                rate = RATE
102            channels = self._config.options.get("speaker_playback_channels")
103            if channels is None:
104                channels = CHANNELS
105            playback_delta_in_ms = self._config.options.get(
106                "speaker_playback_delta_in_ms"
107            )
108            if playback_delta_in_ms is None:
109                playback_delta_in_ms = PLAYBACK_DELTA
110            device_index = self._config.options.get("speaker_playback_device_index")
111
112            self._logger.debug("rate: %s", rate)
113            self._logger.debug("channels: %s", channels)
114            self._logger.debug("device_index: %s", device_index)
115
116            self._speaker_created = True
117
118            if device_index is not None:
119                self._speaker = Speaker(
120                    rate=rate,
121                    channels=channels,
122                    last_play_delta_in_ms=playback_delta_in_ms,
123                    verbose=self._config.verbose,
124                    output_device_index=device_index,
125                    microphone=self._microphone,
126                )
127            else:
128                self._speaker = Speaker(
129                    rate=rate,
130                    channels=channels,
131                    last_play_delta_in_ms=playback_delta_in_ms,
132                    verbose=self._config.verbose,
133                    microphone=self._microphone,
134                )
135
136        # call the parent constructor
137        super().__init__(self._config, self._endpoint)
async def start( self, options: Union[SpeakWSOptions, Dict, NoneType] = None, addons: Optional[Dict] = None, headers: Optional[Dict] = None, members: Optional[Dict] = None, **kwargs) -> bool:
140    async def start(
141        self,
142        options: Optional[Union[SpeakWSOptions, Dict]] = None,
143        addons: Optional[Dict] = None,
144        headers: Optional[Dict] = None,
145        members: Optional[Dict] = None,
146        **kwargs,
147    ) -> bool:
148        """
149        Starts the WebSocket connection for text-to-speech synthesis.
150        """
151        self._logger.debug("AsyncSpeakWebSocketClient.start ENTER")
152        self._logger.info("options: %s", options)
153        self._logger.info("addons: %s", addons)
154        self._logger.info("headers: %s", headers)
155        self._logger.info("members: %s", members)
156        self._logger.info("kwargs: %s", kwargs)
157
158        if isinstance(options, SpeakWSOptions) and not options.check():
159            self._logger.error("options.check failed")
160            self._logger.debug("AsyncSpeakWebSocketClient.start LEAVE")
161            raise DeepgramError("Fatal text-to-speech options error")
162
163        self._addons = addons
164        self._headers = headers
165
166        # add "members" as members of the class
167        if members is not None:
168            self.__dict__.update(members)
169
170        # set kwargs as members of the class
171        if kwargs is not None:
172            self._kwargs = kwargs
173        else:
174            self._kwargs = {}
175
176        if isinstance(options, SpeakWSOptions):
177            self._logger.info("SpeakWSOptions switching class -> dict")
178            self._options = options.to_dict()
179        elif options is not None:
180            self._options = options
181        else:
182            self._options = {}
183
184        try:
185            # speaker substitutes the listening thread
186            if self._speaker is not None:
187                self._logger.notice("passing speaker to delegate_listening")
188                super().delegate_listening(self._speaker)
189
190            # call parent start
191            if (
192                await super().start(
193                    self._options,
194                    self._addons,
195                    self._headers,
196                    **dict(cast(Dict[Any, Any], self._kwargs)),
197                )
198                is False
199            ):
200                self._logger.error("AsyncSpeakWebSocketClient.start failed")
201                self._logger.debug("AsyncSpeakWebSocketClient.start LEAVE")
202                return False
203
204            if self._speaker is not None:
205                self._logger.notice("start delegate_listening thread")
206                self._speaker.start()
207
208            # debug the threads
209            for thread in threading.enumerate():
210                self._logger.debug("after running thread: %s", thread.name)
211            self._logger.debug("number of active threads: %s", threading.active_count())
212
213            # flush thread
214            if self._config.is_auto_flush_speak_enabled():
215                self._logger.notice("autoflush is enabled")
216                self._flush_thread = asyncio.create_task(self._flush())
217            else:
218                self._logger.notice("autoflush is disabled")
219
220            # debug the threads
221            for thread in threading.enumerate():
222                self._logger.debug("after running thread: %s", thread.name)
223            self._logger.debug("number of active threads: %s", threading.active_count())
224
225            self._logger.notice("start succeeded")
226            self._logger.debug("AsyncSpeakWebSocketClient.start LEAVE")
227            return True
228
229        except Exception as e:  # pylint: disable=broad-except
230            self._logger.error(
231                "WebSocketException in AsyncSpeakWebSocketClient.start: %s", e
232            )
233            self._logger.debug("AsyncSpeakWebSocketClient.start LEAVE")
234            if self._config.options.get("termination_exception_connect") is True:
235                raise
236            return False

Starts the WebSocket connection for text-to-speech synthesis.

def on( self, event: deepgram.clients.speak.enums.SpeakWebSocketEvents, handler: Callable) -> None:
240    def on(self, event: SpeakWebSocketEvents, handler: Callable) -> None:
241        """
242        Registers event handlers for specific events.
243        """
244        self._logger.info("event subscribed: %s", event)
245        if event in SpeakWebSocketEvents.__members__.values() and callable(handler):
246            self._event_handlers[event].append(handler)

Registers event handlers for specific events.

async def send_text(self, text_input: str) -> bool:
482    async def send_text(self, text_input: str) -> bool:
483        """
484        Sends text to the WebSocket connection to generate audio.
485
486        Args:
487            text_input (str): The raw text to be synthesized. This function will automatically wrap
488                the text in a JSON object of type "Speak" with the key "text".
489
490        Returns:
491            bool: True if the text was successfully sent, False otherwise.
492        """
493        return await self.send_raw(json.dumps({"type": "Speak", "text": text_input}))

Sends text to the WebSocket connection to generate audio.

Args: text_input (str): The raw text to be synthesized. This function will automatically wrap the text in a JSON object of type "Speak" with the key "text".

Returns: bool: True if the text was successfully sent, False otherwise.

async def send(self, data: Union[bytes, str]) -> bool:
495    async def send(self, data: Union[bytes, str]) -> bool:
496        """
497        Alias for send_text. Please see send_text for more information.
498        """
499        if isinstance(data, bytes):
500            self._logger.error("send() failed - data is bytes")
501            return False
502
503        return await self.send_text(data)

Alias for send_text. Please see send_text for more information.

async def send_control( self, msg_type: Union[deepgram.clients.speak.enums.SpeakWebSocketMessage, str], data: Optional[str] = '') -> bool:
506    async def send_control(
507        self, msg_type: Union[SpeakWebSocketMessage, str], data: Optional[str] = ""
508    ) -> bool:
509        """
510        Sends a control message consisting of type SpeakWebSocketEvents over the WebSocket connection.
511
512        Args:
513            msg_type (SpeakWebSocketEvents): The type of control message to send.
514            (Optional) data (str): The data to send with the control message.
515
516        Returns:
517            bool: True if the control message was successfully sent, False otherwise.
518        """
519        control_msg = json.dumps({"type": msg_type})
520        return await self.send_raw(control_msg)

Sends a control message consisting of type SpeakWebSocketEvents over the WebSocket connection.

Args: msg_type (SpeakWebSocketEvents): The type of control message to send. (Optional) data (str): The data to send with the control message.

Returns: bool: True if the control message was successfully sent, False otherwise.

async def send_raw(self, msg: str) -> bool:
525    async def send_raw(self, msg: str) -> bool:
526        """
527        Sends a raw/control message over the WebSocket connection. This message must contain a valid JSON object.
528
529        Args:
530            msg (str): The raw message to send over the WebSocket connection.
531
532        Returns:
533            bool: True if the message was successfully sent, False otherwise.
534        """
535        self._logger.spam("AsyncSpeakWebSocketClient.send_raw ENTER")
536
537        if self._config.is_inspecting_speak():
538            try:
539                _tmp_json = json.loads(msg)
540                if "type" in _tmp_json:
541                    self._logger.debug(
542                        "Inspecting Message: Sending %s", _tmp_json["type"]
543                    )
544                    match _tmp_json["type"]:
545                        case SpeakWebSocketMessage.Speak:
546                            inspect_res = await self._inspect()
547                            if not inspect_res:
548                                self._logger.error("inspect_res failed")
549                        case SpeakWebSocketMessage.Flush:
550                            self._last_datagram = None
551                            self._flush_count += 1
552                            self._logger.debug(
553                                "Increment Flush count: %d", self._flush_count
554                            )
555            except Exception as e:  # pylint: disable=broad-except
556                self._logger.error("send_raw() failed - Exception: %s", str(e))
557
558        try:
559            if await super().send(msg) is False:
560                self._logger.error("send_raw() failed")
561                self._logger.spam("AsyncSpeakWebSocketClient.send_raw LEAVE")
562                return False
563            self._logger.spam("send_raw() succeeded")
564            self._logger.spam("AsyncSpeakWebSocketClient.send_raw LEAVE")
565            return True
566        except Exception as e:  # pylint: disable=broad-except
567            self._logger.error("send_raw() failed - Exception: %s", str(e))
568            self._logger.spam("AsyncSpeakWebSocketClient.send_raw LEAVE")
569            if self._config.options.get("termination_exception_send") is True:
570                raise
571            return False

Sends a raw/control message over the WebSocket connection. This message must contain a valid JSON object.

Args: msg (str): The raw message to send over the WebSocket connection.

Returns: bool: True if the message was successfully sent, False otherwise.

async def flush(self) -> bool:
575    async def flush(self) -> bool:
576        """
577        Flushes the current buffer and returns generated audio
578        """
579        self._logger.spam("AsyncSpeakWebSocketClient.flush ENTER")
580
581        self._logger.notice("Sending Flush...")
582        ret = await self.send_control(SpeakWebSocketMessage.Flush)
583
584        if not ret:
585            self._logger.error("flush failed")
586            self._logger.spam("AsyncSpeakWebSocketClient.flush LEAVE")
587            return False
588
589        self._logger.notice("flush succeeded")
590        self._logger.spam("AsyncSpeakWebSocketClient.flush LEAVE")
591
592        return True

Flushes the current buffer and returns generated audio

async def clear(self) -> bool:
594    async def clear(self) -> bool:
595        """
596        Clears the current buffer on the server
597        """
598        self._logger.spam("AsyncSpeakWebSocketClient.clear ENTER")
599
600        self._logger.notice("Sending Clear...")
601        ret = await self.send_control(SpeakWebSocketMessage.Clear)
602
603        if not ret:
604            self._logger.error("clear failed")
605            self._logger.spam("AsyncSpeakWebSocketClient.clear LEAVE")
606            return False
607
608        self._logger.notice("clear succeeded")
609        self._logger.spam("AsyncSpeakWebSocketClient.clear LEAVE")
610
611        return True

Clears the current buffer on the server

async def wait_for_complete(self):
613    async def wait_for_complete(self):
614        """
615        This method will block until the speak is done playing sound.
616        """
617        self._logger.spam("AsyncSpeakWebSocketClient.wait_for_complete ENTER")
618
619        if self._speaker is None:
620            self._logger.error("speaker is None. Return immediately")
621            return
622
623        loop = asyncio.get_event_loop()
624        await loop.run_in_executor(None, self._speaker.wait_for_complete)
625        self._logger.notice("wait_for_complete succeeded")
626        self._logger.spam("AsyncSpeakWebSocketClient.wait_for_complete LEAVE")

This method will block until the speak is done playing sound.

async def finish(self) -> bool:
631    async def finish(self) -> bool:
632        """
633        Closes the WebSocket connection gracefully.
634        """
635        self._logger.debug("AsyncSpeakWebSocketClient.finish ENTER")
636
637        # stop the threads
638        self._logger.verbose("cancelling tasks...")
639        try:
640            # call parent finish
641            if await super().finish() is False:
642                self._logger.error("AsyncListenWebSocketClient.finish failed")
643
644            if self._speaker is not None and self._speaker_created:
645                self._speaker.finish()
646                self._speaker_created = False
647
648            # Before cancelling, check if the tasks were created
649            # debug the threads
650            for thread in threading.enumerate():
651                self._logger.debug("before running thread: %s", thread.name)
652            self._logger.debug("number of active threads: %s", threading.active_count())
653
654            tasks = []
655
656            if self._speaker is not None:
657                self._logger.notice("stopping speaker...")
658                self._speaker.finish()
659                self._speaker = None
660                self._logger.notice("speaker stopped")
661
662            if self._flush_thread is not None:
663                self._logger.notice("stopping _flush_thread...")
664                self._flush_thread.cancel()
665                tasks.append(self._flush_thread)
666                self._logger.notice("_flush_thread cancelled")
667
668            # Use asyncio.gather to wait for tasks to be cancelled
669            # Prevent indefinite waiting by setting a timeout
670            await asyncio.wait_for(asyncio.gather(*tasks), timeout=10)
671            self._logger.notice("threads joined")
672
673            # debug the threads
674            for thread in threading.enumerate():
675                self._logger.debug("after running thread: %s", thread.name)
676            self._logger.debug("number of active threads: %s", threading.active_count())
677
678            self._logger.notice("finish succeeded")
679            self._logger.spam("AsyncSpeakWebSocketClient.finish LEAVE")
680            return True
681
682        except asyncio.CancelledError:
683            self._logger.debug("tasks cancelled")
684            self._logger.debug("AsyncSpeakWebSocketClient.finish LEAVE")
685            return False
686
687        except asyncio.TimeoutError as e:
688            self._logger.error("tasks cancellation timed out: %s", e)
689            self._logger.debug("AsyncSpeakWebSocketClient.finish LEAVE")
690            return False

Closes the WebSocket connection gracefully.

SpeakWebSocketClient = <class 'SpeakWSClient'>
AsyncSpeakWebSocketClient = <class 'AsyncSpeakWSClient'>