EnzoBustos commited on
Commit
2cf872d
·
1 Parent(s): 21621d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -18
app.py CHANGED
@@ -4,14 +4,10 @@ import os
4
  import streamlit as st
5
  import transformers
6
  from transformers import pipeline
7
- import torch
8
  from summarizer import Summarizer
 
9
  import ffmpeg
10
  import speech_recognition as sr
11
- from pytube import YouTube
12
- import pandas as pd
13
- import string
14
- import whisper
15
  from moviepy.editor import AudioFileClip
16
 
17
  #Download youtube video
@@ -19,22 +15,18 @@ def download_audio(link):
19
  with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
20
  video.download(link)
21
 
22
- #Convert video format
23
- def convert_mp4_to_wav(mp4_file, wav_file):
24
- video = AudioFileClip(mp4_file)
25
- video.write_audiofile(wav_file)
 
26
 
27
  #def portuguese_sum_pipeline(link):
28
  # video_title = download_audio(link)
29
 
30
  def english_sum_pipeline(link):
31
  download_audio(link)
32
-
33
- #audio-to-text
34
- transcriptor = whisper.load_model("base.en")
35
- directory = os.getcwd()
36
- result = transcriptor.transcribe(os.path.join(directory, "video.mp3"))
37
- transcript_text = result["text"]
38
 
39
  #extractive summarization
40
  extractive_model = Summarizer()
@@ -51,9 +43,7 @@ def english_qa_pipeline(question, context):
51
  nlp = pipeline("question-answering", model='distilbert-base-uncased-distilled-squad')
52
  result = nlp(question=question, context=context)
53
  return result["answer"]
54
-
55
-
56
-
57
  #Collect inputs and create the interface
58
  def main():
59
  header = st.container()
 
4
  import streamlit as st
5
  import transformers
6
  from transformers import pipeline
 
7
  from summarizer import Summarizer
8
+ import torch
9
  import ffmpeg
10
  import speech_recognition as sr
 
 
 
 
11
  from moviepy.editor import AudioFileClip
12
 
13
  #Download youtube video
 
15
  with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'video.mp3'}) as video:
16
  video.download(link)
17
 
18
+ def get_transcription_en(mp3_file):
19
+ model = whisper.load_model("tiny.en")
20
+ directory = os.getcwd()
21
+ result = model.transcribe(os.path.join(directory, mp3_file))
22
+ return result["text"]
23
 
24
  #def portuguese_sum_pipeline(link):
25
  # video_title = download_audio(link)
26
 
27
  def english_sum_pipeline(link):
28
  download_audio(link)
29
+ transcript_text = get_transcription_en("video.mp3")
 
 
 
 
 
30
 
31
  #extractive summarization
32
  extractive_model = Summarizer()
 
43
  nlp = pipeline("question-answering", model='distilbert-base-uncased-distilled-squad')
44
  result = nlp(question=question, context=context)
45
  return result["answer"]
46
+
 
 
47
  #Collect inputs and create the interface
48
  def main():
49
  header = st.container()