Merge pull request #5 from speechmatics/v0.0.5

dumitrugutu · web-flow · commit ffd9f8dc684e · 2024-11-13T09:14:22.000Z
Add option to change default assistant from cli and other small improvements
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [ "3.10", "3.11" ]
+        python-version: [ "3.9", "3.10", "3.11", "3.12" ]
 
     steps:
       - uses: actions/checkout@v2
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,20 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
+## [0.0.5] - 2024-11-13
+
+### Added
+
+- Added the option to change the assistant from CLI
+- Added the option to load conversation_config from a config file
+- Added client handling of unexpected messages from the server
+
+### Changed
+
+- Allow versions of websockets from `10.0` up to and including `13.1` to mitigate extra_headers compatibility issue
+  with websockets `14.0`
+- Improved documentation for Interaction class
+
 ## [0.0.4] - 2024-11-12
 
 ### Added
@@ -13,8 +27,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - `ResponseInterrupted`: Indicates an interruption in the TTS audio stream from the server.
   The message contains the textual content up to the point where the utterance was stopped.
 - `ResponseCompleted`: Indicates the completion of TTS audio transmission from the server.
-- `ConversationEnding`: Indicates the session will continue in one-sided mode during TTS playback of the final words.
   The message includes the textual content of the utterance just spoken.
+- `ConversationEnding`: Indicates the session will continue in one-sided mode during TTS playback of the final words.
 - `AddAudio`: Implicit name for all inbound binary messages.
   The client confirms receipt by sending an `ServerMessageType.AudioReceived` message.
 - `AudioReceived`: Response to `ServerMessageType.AddAudio`, indicating that audio has been added successfully.
diff --git a/README.md b/README.md
@@ -33,6 +33,39 @@ python setup.py install --user
  speechmatics-flow --url $URL --auth-token $TOKEN --ssl-mode insecure
  ```
 
+### Change Assistant (Amelia → Humphrey)
+
+To set the assistant to *Humphrey* instead of *Amelia* run this command:
+
+```bash
+speechmatics-flow --url $URL --auth-token $TOKEN --ssl-mode insecure --assistant humphrey
+```
+
+### Load conversation_config from a config file
+
+Instead of manually setting up conversation parameters, you can load them from a configuration file.
+
+Create a JSON file with the template details, for example "conversation_config.json" and run flow client
+using the `--config-file` option
+
+```json
+{
+  "template_id": "flow-service-assistant-humphrey",
+  "template_variables": {
+    "persona": "You are an English butler named Humphrey.",
+    "style": "Be charming but unpredictable.",
+    "context": "You are taking a customer's order at a fast food restaurant."
+  }
+}
+```
+
+ ```bash
+ speechmatics-flow --url $URL --auth-token $TOKEN --ssl-mode insecure --config-file conversation_config.json
+ ```
+
+> **Hint**: Why limit Humphrey? Try changing the template_variables to see what happens if he’s not a butler but
+> perhaps... a pirate, a celebrity chef, or a royal advisor. We won’t stop you. 🏴‍☠️
+
 ## Support
 
 If you have any issues with this library or encounter any bugs then please get in touch with us at
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.0.4
+0.0.5
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 httpx==0.27.1
 pyaudio==0.2.14
-websockets>=10
+setuptools
+websockets>=10,<=13.1
diff --git a/speechmatics_flow/cli.py b/speechmatics_flow/cli.py
@@ -10,7 +10,6 @@
 import sys
 from dataclasses import dataclass
 from socket import gaierror
-from typing import Any, Dict
 
 import httpx
 from websockets.exceptions import WebSocketException
@@ -25,6 +24,7 @@
     Interaction,
     ConnectionSettings,
 )
+from speechmatics_flow.templates import TemplateOptions
 
 LOGGER = logging.getLogger(__name__)
 
@@ -99,17 +99,22 @@ def get_conversation_config(
     :param args: Keyword arguments probably from the command line.
     :type args: Dict
 
-    :return: Settings for the ASR engine.
+    :return: Settings for the Flow engine.
     :rtype: models.ConversationConfig
     """
 
-    config: Dict[str, Any] = {}
+    config = {}
+    # First, get configuration from the config file if provided.
     if args.get("config_file"):
         with open(args["config_file"], encoding="utf-8") as config_file:
             config = json.load(config_file)
 
     if config.get("conversation_config"):
-        config.update(config.pop("conversation_config"))
+        config = config["conversation_config"]
+
+    # Command line arguments override values from config file
+    if assistant := args.get("assistant"):
+        config["template_id"] = TemplateOptions.get(assistant)
 
     return ConversationConfig(**config)
 
diff --git a/speechmatics_flow/cli_parser.py b/speechmatics_flow/cli_parser.py
@@ -5,6 +5,8 @@
 import argparse
 import logging
 
+from speechmatics_flow.templates import TemplateOptions
+
 LOGGER = logging.getLogger(__name__)
 
 
@@ -110,6 +112,21 @@ def get_arg_parser():
             "plaintext messages."
         ),
     )
+    parser.add_argument(
+        "--config-file",
+        dest="config_file",
+        type=str,
+        default=None,
+        help="Read the conversation config from a file."
+        " If you provide this, all other config options work as overrides.",
+    )
+    parser.add_argument(
+        "--assistant",
+        default=None,
+        type=str,
+        choices=[k for k in TemplateOptions.keys()],
+        help="Choose your assistant.",
+    )
 
     return parser
 
diff --git a/speechmatics_flow/client.py b/speechmatics_flow/client.py
@@ -198,6 +198,10 @@ async def _consumer(self, message, from_cli: False):
         if message_type is None:
             return
 
+        if message_type not in self.event_handlers:
+            LOGGER.warning(f"Unknown message type {message_type!r}")
+            return
+
         for handler in self.event_handlers[message_type]:
             try:
                 handler(copy.deepcopy(message))
diff --git a/speechmatics_flow/models.py b/speechmatics_flow/models.py
@@ -7,7 +7,9 @@
 import ssl
 from dataclasses import asdict, dataclass, field
 from enum import Enum
-from typing import Callable, Dict, Optional, Literal
+from typing import Callable, Dict, Optional
+
+from speechmatics_flow.templates import TemplateID
 
 
 @dataclass
@@ -62,9 +64,7 @@ class ConnectionSettings:
 class ConversationConfig:
     """Defines configuration parameters for conversation requests."""
 
-    template_id: Literal[
-        "default", "flow-service-assistant-amelia", "flow-service-assistant-humphrey"
-    ] = "default"
+    template_id: TemplateID = "default"
     """Name of a predefined template."""
 
     template_variables: Optional[Dict[str, str]] = None
@@ -160,7 +160,30 @@ class ServerMessageType(str, Enum):
 
 @dataclass
 class Interaction:
-    """Defines various interactions between client and server."""
+    """
+    Defines a single interaction between a client and a server, typically
+    used to handle non-continuous streams such as an audio file. This class
+    enables the server to respond after the stream has finished or based
+    on the specified callback function, allowing flexibility in connection
+    handling after streaming.
+
+    Attributes:
+        stream (io.BufferedReader): The audio stream to be sent to the server.
+        callback (Optional[Callable]): An optional function to be executed when
+            the audio stream ends. This can be used to delay connection closure
+            or perform additional actions upon stream completion.
+
+    Examples:
+        Keep the connection open for an additional 2 seconds after streaming
+        an audio file, allowing time for the server to respond.
+
+        ```python
+        Interaction(audio_stream, callback=lambda x: time.sleep(2))
+        ```
+    """
 
     stream: io.BufferedReader
+    """The audio stream to be sent to the server."""
+
     callback: Optional[Callable] = None
+    """An optional function to be executed when the audio stream ends."""
diff --git a/speechmatics_flow/templates.py b/speechmatics_flow/templates.py
@@ -0,0 +1,20 @@
+"""Pre-configured Template IDs to start a Flow conversation."""
+
+from enum import Enum
+from typing import Literal
+
+
+class Template(Enum):
+    default = "default"
+    amelia = "flow-service-assistant-amelia"
+    humphrey = "flow-service-assistant-humphrey"
+
+
+TemplateID = Literal[
+    Template.default.value,
+    Template.amelia.value,
+    Template.humphrey.value,
+]
+
+# Map user-friendly name to full TemplateID
+TemplateOptions = {t.name: t.value for t in Template}
diff --git a/tests/data/conversation_config.json b/tests/data/conversation_config.json
@@ -0,0 +1,8 @@
+{
+  "template_id": "flow-service-assistant-humphrey",
+  "template_variables": {
+    "persona": "You are an English butler named Humphrey.",
+    "style": "Be charming but unpredictable.",
+    "context": "You are taking a customer's order at a fast food restaurant."
+  }
+}
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -0,0 +1,54 @@
+from pytest import mark, param
+
+from speechmatics_flow import cli
+from speechmatics_flow.templates import Template
+
+TEMPLATE_VARS = {
+    "persona": "You are an English butler named Humphrey.",
+    "style": "Be charming but unpredictable.",
+    "context": "You are taking a customer's order at a fast food restaurant.",
+}
+
+
+@mark.parametrize(
+    "args, exp_values",
+    [
+        param(
+            [],
+            {"template_id": Template.default.value},
+            id="default assistant",
+        ),
+        param(
+            ["--assistant=amelia"],
+            {"template_id": Template.amelia.value},
+            id="assistant amelia",
+        ),
+        param(
+            ["--assistant=humphrey"],
+            {"template_id": Template.humphrey.value},
+            id="assistant humphrey",
+        ),
+        param(
+            ["--config-file=tests/data/conversation_config.json"],
+            {
+                "template_id": "flow-service-assistant-humphrey",
+                "template_variables": TEMPLATE_VARS,
+            },
+            id="params from config file",
+        ),
+        param(
+            ["--assistant=amelia", "--config-file=tests/data/conversation_config.json"],
+            {
+                "template_id": "flow-service-assistant-amelia",
+                "template_variables": TEMPLATE_VARS,
+            },
+            id="params from config file with assistant override from cli",
+        ),
+    ],
+)
+def test_get_conversation_config(args, exp_values):
+    test_values = vars(cli.parse_args(args=args))
+    config = cli.get_conversation_config(test_values)
+    assert config.asdict() == exp_values, "Expecting {} but got {}".format(
+        exp_values, config.asdict()
+    )