[services]: add text_to_speak

X-FRI · Jun 26, 2024 · 82bd4e9 · 82bd4e9
1 parent 6d89e1d
commit 82bd4e9
Show file tree

Hide file tree

Showing 7 changed files with 130 additions and 41 deletions.
diff --git a/autumnbot/autumnbot/main.py b/autumnbot/autumnbot/main.py
@@ -26,6 +26,8 @@
 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from typing import Text
+from services.text_to_speak.text_to_speak import TextToSpeak
 from services.ollama_client.ollama_client import OllamaClient
 from services.speak_to_text.speak_to_text import SpeakToText
 from services.camera_saver.camera_saver import CameraSaver
@@ -36,14 +38,15 @@
 
 # These are all services of AutumnBot
 SERVICES: set[Type[Service]] = {
-    # CameraSaver,
+    CameraSaver,
     SpeakToText,
     VoiceRecorder,
     OllamaClient,
+    TextToSpeak,
 }
 
 
-def camera_saver_example(service_manager: ServiceManager) -> None:
+def example1(service_manager: ServiceManager) -> None:
     import cv2
 
     camera_saver = service_manager.get_started_service(CameraSaver)
@@ -55,38 +58,20 @@ def camera_saver_example(service_manager: ServiceManager) -> None:
             cv2.waitKey()
 
 
-def voice_recorder_example(service_manager: ServiceManager) -> None:
-    import os
-
-    voice_recorder = service_manager.get_started_service(VoiceRecorder)
-    speak_to_text = service_manager.get_started_service(SpeakToText)
-
-    if voice_recorder is not None and speak_to_text is not None:
-        pre_voice = ""
-        while True:
-            voice = voice_recorder.ask({})
-            if voice == pre_voice:
-                continue
-            else:
-                pre_voice = voice
-                text = cast(str, speak_to_text.ask(voice))
-                if "退" in text and "出" in text:
-                    break
-                os.remove(cast(str, voice))
-
-
-def ollama_client_example(service_manager: ServiceManager) -> None:
+def example2(service_manager: ServiceManager) -> None:
     from services.ollama_client.ollama_message import OllamaMessage
     import os
 
     ollama_client = service_manager.get_started_service(OllamaClient)
     voice_recorder = service_manager.get_started_service(VoiceRecorder)
     speak_to_text = service_manager.get_started_service(SpeakToText)
+    text_to_speak = service_manager.get_started_service(TextToSpeak)
 
     if (
         ollama_client is not None
         and voice_recorder is not None
         and speak_to_text is not None
+        and text_to_speak is not None
     ):
         pre_voice = ""
         while True:
@@ -97,23 +82,26 @@ def ollama_client_example(service_manager: ServiceManager) -> None:
                 message = ollama_client.ask(
                     OllamaMessage(content=cast(str, speak_to_text.ask(voice)))
                 )
-                print(message)
+                if message is not None:
+                    message = cast(dict[str, Any], message)["content"]
+                    text_to_speak.ask(message)
+
+                    if "再" in message and "见" in message:
+                        break
+
                 os.remove(cast(str, voice))
 
 
 # An example of managing and using all services through ServiceManager
-def example() -> None:
+def examples() -> None:
     service_manager = ServiceManager(SERVICES)
     service_manager.start_all_services()
 
     try:
-        # camera_saver_example(service_manager)
-        # speak_to_text_example(service_manager)
-        # voice_recorder_example(service_manager)
-        ollama_client_example(service_manager)
+        example2(service_manager)
     finally:
         service_manager.stop_all_services()
 
 
 def main() -> None:
-    example()
+    examples()
diff --git a/autumnbot/pyproject.toml b/autumnbot/pyproject.toml
@@ -16,6 +16,7 @@ vosk = "^0.3.45"
 
 ollama = "^0.2.1"
 alive-progress = "^3.1.5"
+pyttsx3 = "^2.90"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"

diff --git a/autumnbot/services/ollama_client/ollama_client.py b/autumnbot/services/ollama_client/ollama_client.py
@@ -33,21 +33,27 @@
 import ollama
 import alive_progress
 
-
 class OllamaClient(service.Service):
 
     CLASS_NAME: str = "Ollama"
 
+    # Request Ollama's message context. In chat mode, some models can optimize output content through context.
     __context: list[ollama.Message]
 
     def __init__(self) -> None:
         super().__init__()
         self.info("initialize")
         self.__context = list()
 
-    def on_receive(
-        self, message: OllamaMessage
-    ) -> Optional[Union[Mapping[str, Any], Iterator[Mapping[str, Any]]]]:
+    def on_start(self) -> None:
+        super().on_start()
+        self.info("start")
+
+    def on_stop(self) -> None:
+        super().on_stop()
+        self.info("stop")
+
+    def on_receive(self, message: OllamaMessage) -> Optional[dict[str, Any]]:
         self.info("request message")
 
         with alive_progress.alive_bar(3) as bar:

diff --git a/autumnbot/services/ollama_client/ollama_message.py b/autumnbot/services/ollama_client/ollama_message.py
@@ -32,13 +32,18 @@
 
 import ollama
 
+
+# api request for generating ollama
 class OllamaMessage(utils.logging.Logging):
 
     MODULE_NAME: str = "service"
     CLASS_NAME: str = "OllamaMessage"
-
+
+    # Request type (chat or generate)
     typ: Optional[str]
     content: str
+
+    # model called
     model: str
 
     def __init__(self, content: str, typ: str = "chat", model: str = "qwen") -> None:
@@ -50,6 +55,7 @@ def __init__(self, content: str, typ: str = "chat", model: str = "qwen") -> None
         self.model = model
 
     @staticmethod
+    # Check if request type is valid
     def __check_typ(typ: str) -> Optional[str]:
         match typ:
             case "chat":
@@ -58,9 +64,12 @@ def __check_typ(typ: str) -> Optional[str]:
     def __to_request_message(self) -> ollama.Message:
         return {"role": "user", "content": self.content}
 
+    # Return the corresponding request function according to self.__typ
     def to_request(
         self,
-    ) -> Optional[Callable[[Any], Union[Mapping[str, Any], Iterator[Mapping[str, Any]]]]]:
+    ) -> Optional[
+        Callable[[Any], Union[Mapping[str, Any], Iterator[Mapping[str, Any]]]]
+    ]:
         if self.typ is None:
             return None
 
@@ -73,10 +82,9 @@ def to_request(
     def __to_chat_request(
         self,
     ) -> Callable[
-        [Sequence[ollama.Message]],
+        [list[ollama.Message]],
         Union[Mapping[str, Any], Iterator[Mapping[str, Any]]],
     ]:
         return lambda history: ollama.chat(
             model=self.model, messages=[self.__to_request_message()]
         )
-
diff --git a/autumnbot/services/service.py b/autumnbot/services/service.py
@@ -45,13 +45,10 @@ def on_failure(
         return super().on_failure(exception_type, exception_value, traceback)
 
     def on_receive(self, message: Any) -> Any:
-        self.info("receive")
         return super().on_receive(message)
 
     def on_start(self) -> None:
-        self.info("start")
         return super().on_start()
 
     def on_stop(self) -> None:
-        self.info("stop")
         return super().on_stop()
diff --git a/autumnbot/services/text_to_speak/__init__.py b/autumnbot/services/text_to_speak/__init__.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2024 Muqiu Han
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright notice,
+#       this list of conditions and the following disclaimer in the documentation
+#       and/or other materials provided with the distribution.
+#     * Neither the name of AutumnBot nor the names of its contributors
+#       may be used to endorse or promote products derived from this software
+#       without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/autumnbot/services/text_to_speak/text_to_speak.py b/autumnbot/services/text_to_speak/text_to_speak.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2024 Muqiu Han
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright notice,
+#       this list of conditions and the following disclaimer in the documentation
+#       and/or other materials provided with the distribution.
+#     * Neither the name of AutumnBot nor the names of its contributors
+#       may be used to endorse or promote products derived from this software
+#       without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from .. import service
+from preimport import *
+
+import pyttsx3
+
+
+# Use pyttsx3 to convert text to speech. On Linux, pyttsx3 calls espeak.
+class TextToSpeak(service.Service):
+    CLASS_NAME: str = "TextToSpeak"
+
+    __engine: pyttsx3.Engine
+
+    def __init__(self) -> None:
+        super().__init__()
+
+        self.info("initialize")
+        self.__engine = pyttsx3.init()
+
+        # Set the engine used by pyttsx3 to enable Chinese support
+        self.__engine.setProperty("voice", "zh")
+
+    def on_start(self) -> None:
+        super().on_start()
+        self.info("start")
+
+    def on_stop(self) -> None:
+        super().on_stop()
+        self.info("stop")
+
+    def on_receive(self, message: str, now: bool = True) -> None:
+        self.info("request text to speak")
+        if now:
+            self.__engine.say(message)
+            self.__engine.runAndWait()