commit 7ac6e97056e9a9aa2a019a58280ca90464b83976 Author: Maximilian Giller Date: Sat Oct 14 19:15:39 2023 +0200 Sometimes working implementation diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..38da6d9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +.vscode/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..4948ccf --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# Rominian + +Converts speech to text and inserts text at cursor. Uses a button combination to trigger. + +## Setup + +`sudo apt-get install python3-pyaudio ffmpeg python3-tk python3-dev` \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e69fde0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +openai-whisper +SpeechRecognition +soundfile +keyboard \ No newline at end of file diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..93c7d5c --- /dev/null +++ b/src/main.py @@ -0,0 +1,43 @@ +from typer import insert_text_at_cursor, callback_for_keycombination, wait_for_callbacks +from speech import AudioProcessor +import logging + + +LISTENING_KEYCOMBINATION: str = "ctrl" +RELEASE_BTN: str = "ctrl" + +logging.getLogger().setLevel(logging.DEBUG) + + +def phrase_callback(phrase: str) -> None: + if audio_processor.is_listening: + return + + insert_text_at_cursor(phrase) + +def start_listening(): + if audio_processor.is_listening: + return + + logging.info(f'Listening... press [{RELEASE_BTN}] to stop.') + audio_processor.start_recording() + +def stop_listening(): + if not audio_processor.is_listening: + return + + logging.info("Stopped listening.") + audio_processor.stop_recording() + + insert_text_at_cursor(audio_processor.get_last_text()) + + +audio_processor: AudioProcessor = AudioProcessor(model="medium", phrase_callback=phrase_callback) + + +callback_for_keycombination(LISTENING_KEYCOMBINATION, start_listening, on_release=False) +callback_for_keycombination(RELEASE_BTN, stop_listening, on_release=True) + + +logging.info(f'Ready, wait for [{LISTENING_KEYCOMBINATION.upper()}]') +wait_for_callbacks() diff --git a/src/speech.py b/src/speech.py new file mode 100644 index 0000000..fea7701 --- /dev/null +++ b/src/speech.py @@ -0,0 +1,57 @@ +import speech_recognition as sr +import logging + + +class AudioProcessor: + def __init__(self, *, language: str = "german", model: str = "base", phrase_callback: callable = None) -> None: + self.language: str = language + self.model: str = model + self.last_text: str = None + self.phrase_callback: callable = phrase_callback + self.is_listening: bool = False + self.stop_listening_caller = None + + logging.debug("Found the following microphones:") + for index, name in sr.Microphone.list_working_microphones().items(): + logging.debug(f"Microphone with index {index} and name `{name}` found") + + self.recognizer: sr.Recognizer = sr.Recognizer() + self.listener: sr.Microphone = sr.Microphone() + with self.listener as source: + self.recognizer.adjust_for_ambient_noise(source) # we only need to calibrate once, before we start listening + + def get_last_text(self) -> str: + return self.last_text + + def is_listening(self) -> bool: + return self.is_listening + + def start_recording(self) -> None: + if self.is_listening: + logging.warning("Listener is already open") + return + + self.last_text = "" + self.is_listening = True + + self.stop_listening_caller = self.recognizer.listen_in_background(self.listener, self.listening_callback) + + def listening_callback(self, recognizer, audio): + new_text = self.recognizer.recognize_whisper( + audio, language=self.language, model=self.model + ) + + if self.last_text is not None and self.last_text != "": + self.last_text += " " + self.last_text += new_text + + if self.phrase_callback: + self.phrase_callback(new_text) + + def stop_recording(self) -> None: + if not self.is_listening: + logging.warning("Listener is already closed") + return + + self.stop_listening_caller(wait_for_stop=False) + self.is_listening = False diff --git a/src/typer.py b/src/typer.py new file mode 100644 index 0000000..de0c1ae --- /dev/null +++ b/src/typer.py @@ -0,0 +1,13 @@ +import keyboard + +def insert_text_at_cursor(text: str): + if text is None or text == "": + return + + keyboard.write(text) + +def callback_for_keycombination(keycombination: str, callback: callable, *, on_release: bool = False): + keyboard.add_hotkey(keycombination, callback, trigger_on_release=on_release) + +def wait_for_callbacks(): + keyboard.wait() \ No newline at end of file