Spaces:
Build error
Build error
Commit
·
f14d11b
1
Parent(s):
e711356
Add ither file
Browse files- utils/Dockerfile.txt +20 -0
- utils/model_names.txt +7 -0
- utils/model_names.yaml +42 -0
- utils/models.yaml +29 -0
- utils/oldmodel.py +47 -0
utils/Dockerfile.txt
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
COPY requirements.txt ./requirements.txt
|
6 |
+
|
7 |
+
RUN apt-get update \
|
8 |
+
&& apt-get install libportaudio2 libportaudiocpp0 portaudio19-dev libsndfile1-dev -y \
|
9 |
+
&& pip3 install pyaudio
|
10 |
+
|
11 |
+
RUN pip install -r requirements.txt
|
12 |
+
|
13 |
+
EXPOSE 8501
|
14 |
+
|
15 |
+
WORKDIR /src
|
16 |
+
COPY . /src
|
17 |
+
|
18 |
+
ENTRYPOINT ["streamlit", "run"]
|
19 |
+
|
20 |
+
CMD ["src/main.py"]
|
utils/model_names.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
INSERT Hugging face models
|
2 |
+
1) Insert tokenizer model name
|
3 |
+
2) Insert space
|
4 |
+
3) Insert huggingface link to model name
|
5 |
+
|
6 |
+
speech_to_text
|
7 |
+
facebook/wav2vec2-base-960h https://huggingface.co/facebook/wav2vec2-base-960h
|
utils/model_names.yaml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# models that generate text from audio data.
|
2 |
+
model_task: # model task
|
3 |
+
speech_to_text:
|
4 |
+
model_name: # model name
|
5 |
+
wav2vec:
|
6 |
+
model_size: # model size
|
7 |
+
base:
|
8 |
+
name: facebook/wav2vec2-base-960h
|
9 |
+
url: https://huggingface.co/facebook/wav2vec2-base-960h
|
10 |
+
year: 2020
|
11 |
+
whisper:
|
12 |
+
model_size:
|
13 |
+
tiny:
|
14 |
+
name: openai/whisper-tiny
|
15 |
+
url: https://huggingface.co/openai/whisper-tiny
|
16 |
+
year: 2022
|
17 |
+
base:
|
18 |
+
name: openai/whisper-base
|
19 |
+
url: https://huggingface.co/openai/whisper-base
|
20 |
+
year: 2022
|
21 |
+
medium:
|
22 |
+
name: openai/whisper-medium
|
23 |
+
url: https://huggingface.co/openai/whisper-medium
|
24 |
+
year: 2022
|
25 |
+
|
26 |
+
# models that generate summaries from text data.
|
27 |
+
text_to_summary:
|
28 |
+
model_name:
|
29 |
+
bert:
|
30 |
+
model_size:
|
31 |
+
large:
|
32 |
+
name: facebook/bart-large-cnn
|
33 |
+
url: https://huggingface.co/facebook/bart-large-cnn
|
34 |
+
year: 2019
|
35 |
+
fbs: 31231
|
36 |
+
|
37 |
+
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
|
utils/models.yaml
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# models that generate text from audio data.
|
2 |
+
wav2vec:
|
3 |
+
task: text_to_speech
|
4 |
+
url: https://huggingface.co/facebook/wav2vec2-base-960h
|
5 |
+
|
6 |
+
wav2vec2:
|
7 |
+
task: text_to_speech
|
8 |
+
url: https://huggingface.co/yongjian/wav2vec2-large-a
|
9 |
+
|
10 |
+
whisper_tiny:
|
11 |
+
task: text_to_speech
|
12 |
+
url: https://huggingface.co/openai/whisper-tiny
|
13 |
+
description: "this is the smallest whisper model that will be used for cloud deployment"
|
14 |
+
year: 2022
|
15 |
+
|
16 |
+
whisper_base:
|
17 |
+
task: text_to_speech
|
18 |
+
url: https://huggingface.co/openai/whisper-base
|
19 |
+
year: 2022
|
20 |
+
|
21 |
+
whisper_medium:
|
22 |
+
task: text_to_speech
|
23 |
+
url: https://huggingface.co/openai/whisper-medium
|
24 |
+
year: 2022
|
25 |
+
|
26 |
+
bart_large:
|
27 |
+
task: text_to_summary
|
28 |
+
url: https://huggingface.co/facebook/bart-large-cnn
|
29 |
+
year: 2022
|
utils/oldmodel.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
import torch
|
3 |
+
import torchaudio
|
4 |
+
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
5 |
+
import speech_recognition as sr
|
6 |
+
import io
|
7 |
+
from pydub import AudioSegment
|
8 |
+
import librosa
|
9 |
+
import whisper
|
10 |
+
from scipy.io import wavfile
|
11 |
+
from test import record_voice
|
12 |
+
|
13 |
+
model = Wav2Vec2ForCTC.from_pretrained(r'yongjian/wav2vec2-large-a') # Note: PyTorch Model
|
14 |
+
tokenizer = Wav2Vec2Processor.from_pretrained(r'yongjian/wav2vec2-large-a')
|
15 |
+
|
16 |
+
|
17 |
+
r = sr.Recognizer()
|
18 |
+
|
19 |
+
from transformers import pipeline
|
20 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
21 |
+
|
22 |
+
with sr.Microphone(sample_rate=16000) as source:
|
23 |
+
print("You can start speaking now")
|
24 |
+
record_voice()
|
25 |
+
x,_ = librosa.load("output.wav")
|
26 |
+
model_inputs = tokenizer(x, sampling_rate=16000, return_tensors="pt", padding=True)
|
27 |
+
logits = model(model_inputs.input_values, attention_mask=model_inputs.attention_mask).logits.cuda() # use .cuda() for GPU acceleration
|
28 |
+
pred_ids = torch.argmax(logits, dim=-1).cpu()
|
29 |
+
pred_text = tokenizer.batch_decode(pred_ids)
|
30 |
+
print(x[:10],x.shape)
|
31 |
+
print('Transcription:', pred_text)
|
32 |
+
|
33 |
+
model = whisper.load_model("base")
|
34 |
+
result = model.transcribe("output.wav")
|
35 |
+
print(result["text"])
|
36 |
+
summary_input = result["text"]
|
37 |
+
|
38 |
+
summary_output = (summarizer(summary_input, max_length=30, min_length=20, do_sample=False))
|
39 |
+
print(summary_output)
|
40 |
+
with open("raw_text.txt",'w',encoding = 'utf-8') as f:
|
41 |
+
f.write(summary_input)
|
42 |
+
f.close()
|
43 |
+
with open("summary_text.txt",'w',encoding = 'utf-8') as f:
|
44 |
+
f.write(summary_output[0]["summary_text"])
|
45 |
+
f.close()
|
46 |
+
|
47 |
+
'''
|