Sometimes working implementation
This commit is contained in:
commit
7ac6e97056
6 changed files with 126 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
__pycache__/
|
||||||
|
.vscode/
|
7
README.md
Normal file
7
README.md
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
# Rominian
|
||||||
|
|
||||||
|
Converts speech to text and inserts text at cursor. Uses a button combination to trigger.
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
`sudo apt-get install python3-pyaudio ffmpeg python3-tk python3-dev`
|
4
requirements.txt
Normal file
4
requirements.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
openai-whisper
|
||||||
|
SpeechRecognition
|
||||||
|
soundfile
|
||||||
|
keyboard
|
43
src/main.py
Normal file
43
src/main.py
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
from typer import insert_text_at_cursor, callback_for_keycombination, wait_for_callbacks
|
||||||
|
from speech import AudioProcessor
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
LISTENING_KEYCOMBINATION: str = "ctrl"
|
||||||
|
RELEASE_BTN: str = "ctrl"
|
||||||
|
|
||||||
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
|
||||||
|
def phrase_callback(phrase: str) -> None:
|
||||||
|
if audio_processor.is_listening:
|
||||||
|
return
|
||||||
|
|
||||||
|
insert_text_at_cursor(phrase)
|
||||||
|
|
||||||
|
def start_listening():
|
||||||
|
if audio_processor.is_listening:
|
||||||
|
return
|
||||||
|
|
||||||
|
logging.info(f'Listening... press [{RELEASE_BTN}] to stop.')
|
||||||
|
audio_processor.start_recording()
|
||||||
|
|
||||||
|
def stop_listening():
|
||||||
|
if not audio_processor.is_listening:
|
||||||
|
return
|
||||||
|
|
||||||
|
logging.info("Stopped listening.")
|
||||||
|
audio_processor.stop_recording()
|
||||||
|
|
||||||
|
insert_text_at_cursor(audio_processor.get_last_text())
|
||||||
|
|
||||||
|
|
||||||
|
audio_processor: AudioProcessor = AudioProcessor(model="medium", phrase_callback=phrase_callback)
|
||||||
|
|
||||||
|
|
||||||
|
callback_for_keycombination(LISTENING_KEYCOMBINATION, start_listening, on_release=False)
|
||||||
|
callback_for_keycombination(RELEASE_BTN, stop_listening, on_release=True)
|
||||||
|
|
||||||
|
|
||||||
|
logging.info(f'Ready, wait for [{LISTENING_KEYCOMBINATION.upper()}]')
|
||||||
|
wait_for_callbacks()
|
57
src/speech.py
Normal file
57
src/speech.py
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
import speech_recognition as sr
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
class AudioProcessor:
|
||||||
|
def __init__(self, *, language: str = "german", model: str = "base", phrase_callback: callable = None) -> None:
|
||||||
|
self.language: str = language
|
||||||
|
self.model: str = model
|
||||||
|
self.last_text: str = None
|
||||||
|
self.phrase_callback: callable = phrase_callback
|
||||||
|
self.is_listening: bool = False
|
||||||
|
self.stop_listening_caller = None
|
||||||
|
|
||||||
|
logging.debug("Found the following microphones:")
|
||||||
|
for index, name in sr.Microphone.list_working_microphones().items():
|
||||||
|
logging.debug(f"Microphone with index {index} and name `{name}` found")
|
||||||
|
|
||||||
|
self.recognizer: sr.Recognizer = sr.Recognizer()
|
||||||
|
self.listener: sr.Microphone = sr.Microphone()
|
||||||
|
with self.listener as source:
|
||||||
|
self.recognizer.adjust_for_ambient_noise(source) # we only need to calibrate once, before we start listening
|
||||||
|
|
||||||
|
def get_last_text(self) -> str:
|
||||||
|
return self.last_text
|
||||||
|
|
||||||
|
def is_listening(self) -> bool:
|
||||||
|
return self.is_listening
|
||||||
|
|
||||||
|
def start_recording(self) -> None:
|
||||||
|
if self.is_listening:
|
||||||
|
logging.warning("Listener is already open")
|
||||||
|
return
|
||||||
|
|
||||||
|
self.last_text = ""
|
||||||
|
self.is_listening = True
|
||||||
|
|
||||||
|
self.stop_listening_caller = self.recognizer.listen_in_background(self.listener, self.listening_callback)
|
||||||
|
|
||||||
|
def listening_callback(self, recognizer, audio):
|
||||||
|
new_text = self.recognizer.recognize_whisper(
|
||||||
|
audio, language=self.language, model=self.model
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.last_text is not None and self.last_text != "":
|
||||||
|
self.last_text += " "
|
||||||
|
self.last_text += new_text
|
||||||
|
|
||||||
|
if self.phrase_callback:
|
||||||
|
self.phrase_callback(new_text)
|
||||||
|
|
||||||
|
def stop_recording(self) -> None:
|
||||||
|
if not self.is_listening:
|
||||||
|
logging.warning("Listener is already closed")
|
||||||
|
return
|
||||||
|
|
||||||
|
self.stop_listening_caller(wait_for_stop=False)
|
||||||
|
self.is_listening = False
|
13
src/typer.py
Normal file
13
src/typer.py
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
import keyboard
|
||||||
|
|
||||||
|
def insert_text_at_cursor(text: str):
|
||||||
|
if text is None or text == "":
|
||||||
|
return
|
||||||
|
|
||||||
|
keyboard.write(text)
|
||||||
|
|
||||||
|
def callback_for_keycombination(keycombination: str, callback: callable, *, on_release: bool = False):
|
||||||
|
keyboard.add_hotkey(keycombination, callback, trigger_on_release=on_release)
|
||||||
|
|
||||||
|
def wait_for_callbacks():
|
||||||
|
keyboard.wait()
|
Loading…
Reference in a new issue