From 6f9cc3a2c4804f1684d8d8e0d9ef1d25337d9ba4 Mon Sep 17 00:00:00 2001 From: xaloc Date: Wed, 4 Oct 2023 16:18:18 +0200 Subject: [PATCH] inital commit --- .gitignore | 5 +++++ README.md | 21 +++++++++++++++++++ app.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 41 +++++++++++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 app.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a14fcc0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +speech/ +__pycache__ +audio* +*.wav +*.mp3 diff --git a/README.md b/README.md new file mode 100644 index 0000000..69a2589 --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +# Podcast filter +This program takes a wav file and produces the transcript of the audio file. The goal is to be able to filter the parts of a podcast that you're interested in by using a keyword list. But it's still a work in progress. + +## Installation on GNU+linux +### Step 1 +Clone the repository + +### Step 2 +Go to the folder of the repository and reate a virtual environment +bash``` +python -m venv +``` + +### Step 3 +Install the requirements (there's a ton because we use the whisper engine for speech to text) +bash``` +pip install -r requirements.txt +``` + +## Usage +Change the name of the file you want to transcribe in the code, make sure it's on the same folder as the program (or give the path in the code). Run the code ```python app.py```. Enjoy the transcript. diff --git a/app.py b/app.py new file mode 100644 index 0000000..bb0e834 --- /dev/null +++ b/app.py @@ -0,0 +1,53 @@ +import speech_recognition as sr +import os +from pydub import AudioSegment +from pydub.silence import split_on_silence + +# sound = AudioSegment.from_mp3("test.mp3") +# sound.export("test.wav", format="wav") + +fname = "ciberseguretat.wav" +keyWords = ['ciberseguretat', 'hacker', 'atac', 'pentesting'] + +r = sr.Recognizer() + +def transcript_audio(audio): + with sr.AudioFile(fname) as source: + audio_data = r.record(source) + text = r.recognize_whisper(audio_data, language='ca') + return(text) + +def large_audio(path, minutes=5): + """Splitting the large audio file into fixed interval chunks + and apply speech recognition on each of these chunks""" + print("Loading file") + sound = AudioSegment.from_file(path) + print(len(sound)) + print("Splitting file") + chunk_length_ms = int(1000 * 60 * minutes) # convert to milliseconds + chunks = [sound[i:i + chunk_length_ms] for i in range(0, len(sound), chunk_length_ms)] + folder_name = "audio-fixed-chunks" + + if not os.path.isdir(folder_name): + os.mkdir(folder_name) + whole_text = "" + print("Starting transcription") + for i, audio_chunk in enumerate(chunks, start=1): + # export audio chunk and save it in + # the `folder_name` directory. + chunk_filename = os.path.join(folder_name, f"chunk{i}.wav") + audio_chunk.export(chunk_filename, format="wav") + # recognize the chunk + try: + text = transcript_audio(chunk_filename) + except sr.UnknownValueError as e: + print("Error:", str(e)) + else: + text = f"{text.capitalize()}. " + print(chunk_filename, ":", text) + whole_text += text + # return the text for all chunks detected + return whole_text + +if __name__=="__main__": + print(large_audio(fname)) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..be0bb3d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,41 @@ +certifi==2023.7.22 +cffi==1.16.0 +charset-normalizer==3.3.0 +cmake==3.27.6 +filelock==3.12.4 +idna==3.4 +Jinja2==3.1.2 +lit==17.0.2 +llvmlite==0.41.0 +MarkupSafe==2.1.3 +more-itertools==10.1.0 +mpmath==1.3.0 +networkx==3.1 +numba==0.58.0 +numpy==1.25.2 +nvidia-cublas-cu11==11.10.3.66 +nvidia-cuda-cupti-cu11==11.7.101 +nvidia-cuda-nvrtc-cu11==11.7.99 +nvidia-cuda-runtime-cu11==11.7.99 +nvidia-cudnn-cu11==8.5.0.96 +nvidia-cufft-cu11==10.9.0.58 +nvidia-curand-cu11==10.2.10.91 +nvidia-cusolver-cu11==11.4.0.1 +nvidia-cusparse-cu11==11.7.4.91 +nvidia-nccl-cu11==2.14.3 +nvidia-nvtx-cu11==11.7.91 +openai-whisper @ git+https://github.com/openai/whisper.git@0a60fcaa9b86748389a656aa013c416030287d47 +pycparser==2.21 +pydub==0.25.1 +regex==2023.10.3 +requests==2.31.0 +six==1.16.0 +soundfile==0.12.1 +SpeechRecognition==3.10.0 +sympy==1.12 +tiktoken==0.3.3 +torch==2.0.1 +tqdm==4.66.1 +triton==2.0.0 +typing_extensions==4.8.0 +urllib3==2.0.6