inital commit
This commit is contained in:
commit
6f9cc3a2c4
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
speech/
|
||||
__pycache__
|
||||
audio*
|
||||
*.wav
|
||||
*.mp3
|
21
README.md
Normal file
21
README.md
Normal file
|
@ -0,0 +1,21 @@
|
|||
# Podcast filter
|
||||
This program takes a wav file and produces the transcript of the audio file. The goal is to be able to filter the parts of a podcast that you're interested in by using a keyword list. But it's still a work in progress.
|
||||
|
||||
## Installation on GNU+linux
|
||||
### Step 1
|
||||
Clone the repository
|
||||
|
||||
### Step 2
|
||||
Go to the folder of the repository and reate a virtual environment
|
||||
bash```
|
||||
python -m venv <name>
|
||||
```
|
||||
|
||||
### Step 3
|
||||
Install the requirements (there's a ton because we use the whisper engine for speech to text)
|
||||
bash```
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Usage
|
||||
Change the name of the file you want to transcribe in the code, make sure it's on the same folder as the program (or give the path in the code). Run the code ```python app.py```. Enjoy the transcript.
|
53
app.py
Normal file
53
app.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
import speech_recognition as sr
|
||||
import os
|
||||
from pydub import AudioSegment
|
||||
from pydub.silence import split_on_silence
|
||||
|
||||
# sound = AudioSegment.from_mp3("test.mp3")
|
||||
# sound.export("test.wav", format="wav")
|
||||
|
||||
fname = "ciberseguretat.wav"
|
||||
keyWords = ['ciberseguretat', 'hacker', 'atac', 'pentesting']
|
||||
|
||||
r = sr.Recognizer()
|
||||
|
||||
def transcript_audio(audio):
|
||||
with sr.AudioFile(fname) as source:
|
||||
audio_data = r.record(source)
|
||||
text = r.recognize_whisper(audio_data, language='ca')
|
||||
return(text)
|
||||
|
||||
def large_audio(path, minutes=5):
|
||||
"""Splitting the large audio file into fixed interval chunks
|
||||
and apply speech recognition on each of these chunks"""
|
||||
print("Loading file")
|
||||
sound = AudioSegment.from_file(path)
|
||||
print(len(sound))
|
||||
print("Splitting file")
|
||||
chunk_length_ms = int(1000 * 60 * minutes) # convert to milliseconds
|
||||
chunks = [sound[i:i + chunk_length_ms] for i in range(0, len(sound), chunk_length_ms)]
|
||||
folder_name = "audio-fixed-chunks"
|
||||
|
||||
if not os.path.isdir(folder_name):
|
||||
os.mkdir(folder_name)
|
||||
whole_text = ""
|
||||
print("Starting transcription")
|
||||
for i, audio_chunk in enumerate(chunks, start=1):
|
||||
# export audio chunk and save it in
|
||||
# the `folder_name` directory.
|
||||
chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
|
||||
audio_chunk.export(chunk_filename, format="wav")
|
||||
# recognize the chunk
|
||||
try:
|
||||
text = transcript_audio(chunk_filename)
|
||||
except sr.UnknownValueError as e:
|
||||
print("Error:", str(e))
|
||||
else:
|
||||
text = f"{text.capitalize()}. "
|
||||
print(chunk_filename, ":", text)
|
||||
whole_text += text
|
||||
# return the text for all chunks detected
|
||||
return whole_text
|
||||
|
||||
if __name__=="__main__":
|
||||
print(large_audio(fname))
|
41
requirements.txt
Normal file
41
requirements.txt
Normal file
|
@ -0,0 +1,41 @@
|
|||
certifi==2023.7.22
|
||||
cffi==1.16.0
|
||||
charset-normalizer==3.3.0
|
||||
cmake==3.27.6
|
||||
filelock==3.12.4
|
||||
idna==3.4
|
||||
Jinja2==3.1.2
|
||||
lit==17.0.2
|
||||
llvmlite==0.41.0
|
||||
MarkupSafe==2.1.3
|
||||
more-itertools==10.1.0
|
||||
mpmath==1.3.0
|
||||
networkx==3.1
|
||||
numba==0.58.0
|
||||
numpy==1.25.2
|
||||
nvidia-cublas-cu11==11.10.3.66
|
||||
nvidia-cuda-cupti-cu11==11.7.101
|
||||
nvidia-cuda-nvrtc-cu11==11.7.99
|
||||
nvidia-cuda-runtime-cu11==11.7.99
|
||||
nvidia-cudnn-cu11==8.5.0.96
|
||||
nvidia-cufft-cu11==10.9.0.58
|
||||
nvidia-curand-cu11==10.2.10.91
|
||||
nvidia-cusolver-cu11==11.4.0.1
|
||||
nvidia-cusparse-cu11==11.7.4.91
|
||||
nvidia-nccl-cu11==2.14.3
|
||||
nvidia-nvtx-cu11==11.7.91
|
||||
openai-whisper @ git+https://github.com/openai/whisper.git@0a60fcaa9b86748389a656aa013c416030287d47
|
||||
pycparser==2.21
|
||||
pydub==0.25.1
|
||||
regex==2023.10.3
|
||||
requests==2.31.0
|
||||
six==1.16.0
|
||||
soundfile==0.12.1
|
||||
SpeechRecognition==3.10.0
|
||||
sympy==1.12
|
||||
tiktoken==0.3.3
|
||||
torch==2.0.1
|
||||
tqdm==4.66.1
|
||||
triton==2.0.0
|
||||
typing_extensions==4.8.0
|
||||
urllib3==2.0.6
|
Loading…
Reference in New Issue
Block a user