Spaces:
Sleeping
Sleeping
Commit
·
2cf872d
1
Parent(s):
21621d2
Update app.py
Browse files
app.py
CHANGED
@@ -4,14 +4,10 @@ import os
|
|
4 |
import streamlit as st
|
5 |
import transformers
|
6 |
from transformers import pipeline
|
7 |
-
import torch
|
8 |
from summarizer import Summarizer
|
|
|
9 |
import ffmpeg
|
10 |
import speech_recognition as sr
|
11 |
-
from pytube import YouTube
|
12 |
-
import pandas as pd
|
13 |
-
import string
|
14 |
-
import whisper
|
15 |
from moviepy.editor import AudioFileClip
|
16 |
|
17 |
#Download youtube video
|
@@ -19,22 +15,18 @@ def download_audio(link):
|
|
19 |
with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
|
20 |
video.download(link)
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
26 |
|
27 |
#def portuguese_sum_pipeline(link):
|
28 |
# video_title = download_audio(link)
|
29 |
|
30 |
def english_sum_pipeline(link):
|
31 |
download_audio(link)
|
32 |
-
|
33 |
-
#audio-to-text
|
34 |
-
transcriptor = whisper.load_model("base.en")
|
35 |
-
directory = os.getcwd()
|
36 |
-
result = transcriptor.transcribe(os.path.join(directory, "video.mp3"))
|
37 |
-
transcript_text = result["text"]
|
38 |
|
39 |
#extractive summarization
|
40 |
extractive_model = Summarizer()
|
@@ -51,9 +43,7 @@ def english_qa_pipeline(question, context):
|
|
51 |
nlp = pipeline("question-answering", model='distilbert-base-uncased-distilled-squad')
|
52 |
result = nlp(question=question, context=context)
|
53 |
return result["answer"]
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
#Collect inputs and create the interface
|
58 |
def main():
|
59 |
header = st.container()
|
|
|
4 |
import streamlit as st
|
5 |
import transformers
|
6 |
from transformers import pipeline
|
|
|
7 |
from summarizer import Summarizer
|
8 |
+
import torch
|
9 |
import ffmpeg
|
10 |
import speech_recognition as sr
|
|
|
|
|
|
|
|
|
11 |
from moviepy.editor import AudioFileClip
|
12 |
|
13 |
#Download youtube video
|
|
|
15 |
with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
|
16 |
video.download(link)
|
17 |
|
18 |
+
def get_transcription_en(mp3_file):
|
19 |
+
model = whisper.load_model("tiny.en")
|
20 |
+
directory = os.getcwd()
|
21 |
+
result = model.transcribe(os.path.join(directory, mp3_file))
|
22 |
+
return result["text"]
|
23 |
|
24 |
#def portuguese_sum_pipeline(link):
|
25 |
# video_title = download_audio(link)
|
26 |
|
27 |
def english_sum_pipeline(link):
|
28 |
download_audio(link)
|
29 |
+
transcript_text = get_transcription_en("video.mp3")
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
#extractive summarization
|
32 |
extractive_model = Summarizer()
|
|
|
43 |
nlp = pipeline("question-answering", model='distilbert-base-uncased-distilled-squad')
|
44 |
result = nlp(question=question, context=context)
|
45 |
return result["answer"]
|
46 |
+
|
|
|
|
|
47 |
#Collect inputs and create the interface
|
48 |
def main():
|
49 |
header = st.container()
|