Spaces:

DiegoLigtenberg
/

realtimespeech

Build error

App Files Files Community

DiegoLigtenberg commited on Nov 6, 2022

Commit

f14d11b

1 Parent(s): e711356

Add ither file

Browse files

Files changed (5) hide show

utils/Dockerfile.txt +20 -0
utils/model_names.txt +7 -0
utils/model_names.yaml +42 -0
utils/models.yaml +29 -0
utils/oldmodel.py +47 -0

utils/Dockerfile.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+FROM python:3.9
+WORKDIR /app
+COPY requirements.txt ./requirements.txt
+RUN apt-get update \
+        && apt-get install libportaudio2 libportaudiocpp0 portaudio19-dev libsndfile1-dev -y \
+        && pip3 install pyaudio
+RUN pip install -r requirements.txt
+EXPOSE 8501
+WORKDIR /src
+COPY . /src
+ENTRYPOINT ["streamlit", "run"]
+CMD ["src/main.py"]

utils/model_names.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+INSERT Hugging face models
+1) Insert tokenizer model name
+2) Insert space
+3) Insert huggingface link to model name
+speech_to_text
+facebook/wav2vec2-base-960h https://huggingface.co/facebook/wav2vec2-base-960h

utils/model_names.yaml ADDED Viewed

	@@ -0,0 +1,42 @@

+# models that generate text from audio data.
+model_task: # model task
+  speech_to_text:
+    model_name: # model name
+      wav2vec:
+        model_size: # model size
+          base:
+            name: facebook/wav2vec2-base-960h
+            url: https://huggingface.co/facebook/wav2vec2-base-960h
+            year: 2020
+      whisper:
+        model_size:
+          tiny:
+            name: openai/whisper-tiny
+            url: https://huggingface.co/openai/whisper-tiny
+            year: 2022
+          base:
+            name: openai/whisper-base
+            url: https://huggingface.co/openai/whisper-base
+            year: 2022
+          medium:
+            name: openai/whisper-medium
+            url: https://huggingface.co/openai/whisper-medium
+            year: 2022
+  # models that generate summaries from text data.
+  text_to_summary:
+    model_name:
+      bert:
+        model_size:
+          large:
+            name: facebook/bart-large-cnn
+            url: https://huggingface.co/facebook/bart-large-cnn
+            year: 2019
+            fbs: 31231

utils/models.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+# models that generate text from audio data.
+wav2vec:
+  task: text_to_speech
+  url: https://huggingface.co/facebook/wav2vec2-base-960h
+wav2vec2:
+  task: text_to_speech
+  url: https://huggingface.co/yongjian/wav2vec2-large-a
+whisper_tiny:
+  task: text_to_speech
+  url: https://huggingface.co/openai/whisper-tiny
+  description: "this is the smallest whisper model that will be used for cloud deployment"
+  year: 2022
+whisper_base:
+  task: text_to_speech
+  url: https://huggingface.co/openai/whisper-base
+  year: 2022
+whisper_medium:
+  task: text_to_speech
+  url: https://huggingface.co/openai/whisper-medium
+  year: 2022
+bart_large:
+  task: text_to_summary
+  url: https://huggingface.co/facebook/bart-large-cnn
+  year: 2022

utils/oldmodel.py ADDED Viewed

	@@ -0,0 +1,47 @@

+'''
+import torch
+import torchaudio
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+import speech_recognition as sr
+import io
+from pydub import AudioSegment
+import librosa
+import whisper
+from scipy.io import wavfile
+from test import record_voice
+model = Wav2Vec2ForCTC.from_pretrained(r'yongjian/wav2vec2-large-a') # Note: PyTorch Model
+tokenizer = Wav2Vec2Processor.from_pretrained(r'yongjian/wav2vec2-large-a')
+r = sr.Recognizer()
+from transformers import pipeline
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+with sr.Microphone(sample_rate=16000) as source:
+    print("You can start speaking now")
+    record_voice()
+    x,_ = librosa.load("output.wav")
+    model_inputs = tokenizer(x, sampling_rate=16000, return_tensors="pt", padding=True)
+    logits = model(model_inputs.input_values, attention_mask=model_inputs.attention_mask).logits.cuda() # use .cuda() for GPU acceleration
+    pred_ids = torch.argmax(logits, dim=-1).cpu()
+    pred_text = tokenizer.batch_decode(pred_ids)
+    print(x[:10],x.shape)
+    print('Transcription:', pred_text)
+    model = whisper.load_model("base")
+    result = model.transcribe("output.wav")
+    print(result["text"])
+    summary_input = result["text"]
+    summary_output = (summarizer(summary_input, max_length=30, min_length=20, do_sample=False))
+    print(summary_output)
+    with open("raw_text.txt",'w',encoding = 'utf-8') as f:
+        f.write(summary_input)
+        f.close()
+    with open("summary_text.txt",'w',encoding = 'utf-8') as f:
+        f.write(summary_output[0]["summary_text"])
+        f.close()
+'''