podcastFilter/app.py

import speech_recognition as sr
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence

# sound = AudioSegment.from_mp3("test.mp3")
# sound.export("test.wav", format="wav")

fname = "ciberseguretat.wav"
keyWords = ['ciberseguretat', 'hacker', 'atac', 'pentesting']

r = sr.Recognizer()

def transcript_audio(audio):
    with sr.AudioFile(fname) as source:
        audio_data = r.record(source)
        text = r.recognize_whisper(audio_data, language='ca')
    return(text)

def large_audio(path, minutes=5):
    """Splitting the large audio file into fixed interval chunks
    and apply speech recognition on each of these chunks"""
    print("Loading file")
    sound = AudioSegment.from_file(path)
    print(len(sound))
    print("Splitting file")
    chunk_length_ms = int(1000 * 60 * minutes) # convert to milliseconds
    chunks = [sound[i:i + chunk_length_ms] for i in range(0, len(sound), chunk_length_ms)]
    folder_name = "audio-fixed-chunks"

    if not os.path.isdir(folder_name):
        os.mkdir(folder_name)
    whole_text = ""
    print("Starting transcription")
    total_chunks = len(chunks)
    for i, audio_chunk in enumerate(chunks, start=1):
        print(f"Chunk {i} of {total_chunks}")
        # export audio chunk and save it in
        # the `folder_name` directory.
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
        audio_chunk.export(chunk_filename, format="wav")
        # recognize the chunk
        try:
            text = transcript_audio(chunk_filename)
        except sr.UnknownValueError as e:
            print("Error:", str(e))
        else:
            text = f"{text.capitalize()}. "
            # print(chunk_filename, ":", text)
            whole_text += text
    # return the text for all chunks detected
    return whole_text

if __name__=="__main__":
    text = large_audio(fname)
    fname = "transcript.txt"
    with open(fname, 'w') as f:
        f.writelines(text)