EnzoBustos commited on
Commit
73871bd
·
1 Parent(s): 446d6c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -26
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import yt_dlp
2
  import os
3
  import streamlit as st
@@ -14,21 +16,20 @@ st.set_page_config(
14
  page_title = "Turing Videos",
15
  page_icon = icon,
16
  layout = "wide",
17
- initial_sidebar_state = "auto",
18
  )
19
 
20
- #Download youtube video
21
- #@st.cache_data
22
  def download_audio(link):
23
  with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
24
  video.download(link)
25
 
26
  #Load Whisper pipeline via HuggingFace
27
  @st.cache_resource
28
- def load_whisper(seconds):
29
  return pipeline("automatic-speech-recognition",
30
  model="openai/whisper-tiny",
31
- chunk_length_s=seconds,
32
  )
33
 
34
  #Load Extractive Summarizer pipeline via HuggingFace
@@ -44,7 +45,7 @@ def load_extractive():
44
  @st.cache_resource
45
  def load_qa():
46
  return pipeline("question-answering",
47
- model='distilbert-base-uncased-distilled-squad'
48
  )
49
 
50
  #Download punkt function from nltk
@@ -54,34 +55,34 @@ def load_nltk():
54
 
55
  #Make the ASR task
56
  @st.cache_data
57
- def audio_speech_recognition(model_pipeline, video="video.mp3"):
58
- return model_pipeline(video, batch_size=8)["text"].strip()
59
 
60
  #Make the Summarization task
61
  @st.cache_data
62
- def text_summarization(model_pipeline, full_text, ratio):
63
  sentences = nltk.sent_tokenize(full_text)
64
- extractive_sentences = model_pipeline({"sentences": sentences}, strategy="ratio", strategy_args=ratio)
65
  extractive_text = " ".join(extractive_sentences[0])
66
  return extractive_text.strip()
67
 
68
  #Make the QA task
69
  @st.cache_data
70
- def answer_questions(model_pipeline, full_text, questionings):
71
  answers = []
72
  for question in questionings:
73
- result = model_pipeline(question=question, context=full_text)
74
  answers.append(result["answer"])
75
  return answers
76
-
77
  def main():
78
 
79
  header = st.container()
80
  model = st.container()
81
  model_1, model_2 = st.columns(2)
82
-
83
  with st.sidebar:
84
-
85
  st.title(":red[Turing]Videos")
86
 
87
  with st.form("data_collection"):
@@ -102,20 +103,13 @@ def main():
102
  height=50, placeholder="Digite suas perguntas..."
103
  ).split(",")
104
 
105
- seconds = st.select_slider(label="Digite a duração do seu vídeo para otimização:",
106
- options = ["5 min", "15 min", "30 min", "45 min", "60 min"],
107
- value = "15 min",
108
- )
109
-
110
- seconds = int(seconds.replace(" min", "")) * 60
111
-
112
  submitted = st.form_submit_button("Submit")
113
-
114
  if submitted:
115
  st.success('Dados coletados!', icon="✅")
116
  else:
117
  st.error('Dados ainda não coletados!', icon="🚨")
118
-
119
  with header:
120
  st.title(":red[Turing]Videos")
121
  st.subheader("Este projeto utiliza técnicas de inteligência artificial para simplificar e acelerar a compreensão de conteúdo audiovisual.",
@@ -129,7 +123,7 @@ def main():
129
  if language == "Inglês (en)":
130
  download_audio(link)
131
  load_nltk()
132
- whisper = load_whisper(seconds)
133
  extractive = load_extractive()
134
  qa_model = load_qa()
135
 
@@ -152,7 +146,7 @@ def main():
152
  st.header("Resposta das perguntas:")
153
  with st.spinner("Carregando respostas..."):
154
  answers = answer_questions(qa_model, transcript_text, questions)
155
-
156
  for i in range(len(answers)):
157
  st.subheader(questions[i])
158
  st.subheader(answers[i])
 
1
+ %%writefile app.py
2
+
3
  import yt_dlp
4
  import os
5
  import streamlit as st
 
16
  page_title = "Turing Videos",
17
  page_icon = icon,
18
  layout = "wide",
19
+ initial_sidebar_state = "auto",
20
  )
21
 
22
+ @st.cache_data
 
23
  def download_audio(link):
24
  with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
25
  video.download(link)
26
 
27
  #Load Whisper pipeline via HuggingFace
28
  @st.cache_resource
29
+ def load_whisper():
30
  return pipeline("automatic-speech-recognition",
31
  model="openai/whisper-tiny",
32
+ chunk_length_s=30,
33
  )
34
 
35
  #Load Extractive Summarizer pipeline via HuggingFace
 
45
  @st.cache_resource
46
  def load_qa():
47
  return pipeline("question-answering",
48
+ model="rsvp-ai/bertserini-bert-base-squad"
49
  )
50
 
51
  #Download punkt function from nltk
 
55
 
56
  #Make the ASR task
57
  @st.cache_data
58
+ def audio_speech_recognition(_model_pipeline, video="video.mp3"):
59
+ return _model_pipeline(video, batch_size=64)["text"].strip()
60
 
61
  #Make the Summarization task
62
  @st.cache_data
63
+ def text_summarization(_model_pipeline, full_text, ratio):
64
  sentences = nltk.sent_tokenize(full_text)
65
+ extractive_sentences = _model_pipeline({"sentences": sentences}, strategy="ratio", strategy_args=ratio)
66
  extractive_text = " ".join(extractive_sentences[0])
67
  return extractive_text.strip()
68
 
69
  #Make the QA task
70
  @st.cache_data
71
+ def answer_questions(_model_pipeline, full_text, questionings):
72
  answers = []
73
  for question in questionings:
74
+ result = _model_pipeline(question=question, context=full_text)
75
  answers.append(result["answer"])
76
  return answers
77
+
78
  def main():
79
 
80
  header = st.container()
81
  model = st.container()
82
  model_1, model_2 = st.columns(2)
83
+
84
  with st.sidebar:
85
+
86
  st.title(":red[Turing]Videos")
87
 
88
  with st.form("data_collection"):
 
103
  height=50, placeholder="Digite suas perguntas..."
104
  ).split(",")
105
 
 
 
 
 
 
 
 
106
  submitted = st.form_submit_button("Submit")
107
+
108
  if submitted:
109
  st.success('Dados coletados!', icon="✅")
110
  else:
111
  st.error('Dados ainda não coletados!', icon="🚨")
112
+
113
  with header:
114
  st.title(":red[Turing]Videos")
115
  st.subheader("Este projeto utiliza técnicas de inteligência artificial para simplificar e acelerar a compreensão de conteúdo audiovisual.",
 
123
  if language == "Inglês (en)":
124
  download_audio(link)
125
  load_nltk()
126
+ whisper = load_whisper()
127
  extractive = load_extractive()
128
  qa_model = load_qa()
129
 
 
146
  st.header("Resposta das perguntas:")
147
  with st.spinner("Carregando respostas..."):
148
  answers = answer_questions(qa_model, transcript_text, questions)
149
+
150
  for i in range(len(answers)):
151
  st.subheader(questions[i])
152
  st.subheader(answers[i])