CLI to python code

***snippsat*** · Oct-20-2024, 10:10 AM

Can try this.
Make sure ffmpeg is installed and accessible in your system's PATH.
Script assumes that the incoming audio data is in f32le format with a sample rate of 44100 Hz and mono channel,
matching your original ffmpeg command.

import socket
import subprocess

UDP_IP = "0.0.0.0"
UDP_PORT = 3000

ffmpeg_cmd = [
    'ffmpeg',
    '-y',                  # Overwrite output files without asking
    '-f', 'f32le',         # Input format: 32-bit float little-endian PCM
    '-ar', '44100',        # Input sample rate: 44100 Hz
    '-ac', '1',            # Input channels: 1 (mono)
    '-i', 'pipe:',         # Input comes from a pipe
    '/home/xy/microphone.wav'
]

ffmpeg_proc = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
sock.bind((UDP_IP, UDP_PORT))
print(f"Listening on UDP port {UDP_PORT}")
try:
    while True:
        data, addr = sock.recvfrom(4096)  # Adjust buffer size if needed
        ffmpeg_proc.stdin.write(data)
except KeyboardInterrupt:
    print("Interrupted by user. Closing...")
finally:
    ffmpeg_proc.stdin.close()
    ffmpeg_proc.wait()
    sock.close()

azxo1 · (This post was last modified: Oct-21-2024, 08:32 AM by azxo1.)

Your code works perfectly, thank you very much. But still not good. This would be the original concept: (i2s microphone streaming on UDP, "Udp.write((const uint8_t*) audio_block_float,bytesIn*2);" transfer this to recognizer in python, continuously...)

def udp_pcm_to_audio(data):
    # PCM data to wav
    wav_io = io.BytesIO()    
    with wave.open(wav_io, 'wb') as wav_file:
        wav_file.setparams((1, 2, 16000, 0, 'NONE', 'NONE'))
        wav_file.writeframes(data)
    wav_io.seek(0)
    return wav_io

def listen1():
    while True:
        # UDP receive
        data, _ = sock.recvfrom(1024)
       
        # PCM data to object
        audio_data = udp_pcm_to_audio(data)

        # SpeechRecognition 
        with sr.AudioFile(audio_data) as source:
            audio = r.record(source)  
       
        try:
...

error: Unintelligible sound

Maybe it would be better to pass the UDP data to this here?: m = sr.Microphone(4)
Possible?

#!/usr/bin/env python3

import time
import sounddevice
import speech_recognition as sr

def callback(recognizer, audio):
    try:
        # for testing purposes, we're just using the default API key
        # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
        # instead of `r.recognize_google(audio)`
        print(recognizer.recognize_google(audio, language = 'hu-HU'))
    except sr.UnknownValueError:
        pass
      #  print("Google Speech Recognition could not understand audio")
    except sr.RequestError as e:
        pass
    #    print("Could not request results from Google Speech Recognition service; {0}".format(e))


r = sr.Recognizer()
m = sr.Microphone(4)
with m as source:
        with sr.Microphone(device_index = 4, sample_rate = 44100, chunk_size=1024) as source:
......

CLI to python code

User Panel Messages

Announcements