Kr08 commited on
Commit
64f2bf5
·
verified ·
1 Parent(s): fd470bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
- from audio_processing import process_audio, print_results
3
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForQuestionAnswering
4
  import spaces
5
  import torch
6
 
@@ -8,14 +8,18 @@ import torch
8
  cuda_available = torch.cuda.is_available()
9
  device = "cuda" if cuda_available else "cpu"
10
 
11
- # Initialize the summarization and question-answering models
 
 
 
12
  summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn").to(device)
13
  summarizer_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
14
 
15
  qa_model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-cased-distilled-squad").to(device)
16
  qa_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased-distilled-squad")
 
17
 
18
- @spaces.GPU(duration=120)
19
  def transcribe_audio(audio_file, translate, model_size):
20
  language_segments, final_segments = process_audio(audio_file, translate=translate, model_size=model_size)
21
 
@@ -38,14 +42,14 @@ def transcribe_audio(audio_file, translate, model_size):
38
 
39
  return output, full_text
40
 
41
- @spaces.GPU(duration=120)
42
  def summarize_text(text):
43
  inputs = summarizer_tokenizer(text, max_length=1024, truncation=True, return_tensors="pt").to(device)
44
  summary_ids = summarizer_model.generate(inputs["input_ids"], max_length=150, min_length=50, do_sample=False)
45
  summary = summarizer_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
46
  return summary
47
 
48
- @spaces.GPU(duration=120)
49
  def answer_question(context, question):
50
  inputs = qa_tokenizer(question, context, return_tensors="pt").to(device)
51
  outputs = qa_model(**inputs)
@@ -54,13 +58,13 @@ def answer_question(context, question):
54
  answer = qa_tokenizer.decode(inputs["input_ids"][0][answer_start:answer_end])
55
  return answer
56
 
57
- @spaces.GPU(duration=120)
58
  def process_and_summarize(audio_file, translate, model_size):
59
  transcription, full_text = transcribe_audio(audio_file, translate, model_size)
60
  summary = summarize_text(full_text)
61
  return transcription, summary
62
 
63
- @spaces.GPU(duration=120)
64
  def qa_interface(audio_file, translate, model_size, question):
65
  _, full_text = transcribe_audio(audio_file, translate, model_size)
66
  answer = answer_question(full_text, question)
 
1
  import gradio as gr
2
+ from audio_processing import process_audio, print_results, load_models
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForQuestionAnswering
4
  import spaces
5
  import torch
6
 
 
8
  cuda_available = torch.cuda.is_available()
9
  device = "cuda" if cuda_available else "cpu"
10
 
11
+ # Load models globally
12
+ print("Loading models...")
13
+ load_models() # Load Whisper and diarization models
14
+
15
  summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn").to(device)
16
  summarizer_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
17
 
18
  qa_model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-cased-distilled-squad").to(device)
19
  qa_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased-distilled-squad")
20
+ print("Models loaded successfully.")
21
 
22
+ @spaces.GPU
23
  def transcribe_audio(audio_file, translate, model_size):
24
  language_segments, final_segments = process_audio(audio_file, translate=translate, model_size=model_size)
25
 
 
42
 
43
  return output, full_text
44
 
45
+ @spaces.GPU
46
  def summarize_text(text):
47
  inputs = summarizer_tokenizer(text, max_length=1024, truncation=True, return_tensors="pt").to(device)
48
  summary_ids = summarizer_model.generate(inputs["input_ids"], max_length=150, min_length=50, do_sample=False)
49
  summary = summarizer_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
50
  return summary
51
 
52
+ @spaces.GPU
53
  def answer_question(context, question):
54
  inputs = qa_tokenizer(question, context, return_tensors="pt").to(device)
55
  outputs = qa_model(**inputs)
 
58
  answer = qa_tokenizer.decode(inputs["input_ids"][0][answer_start:answer_end])
59
  return answer
60
 
61
+ @spaces.GPU
62
  def process_and_summarize(audio_file, translate, model_size):
63
  transcription, full_text = transcribe_audio(audio_file, translate, model_size)
64
  summary = summarize_text(full_text)
65
  return transcription, summary
66
 
67
+ @spaces.GPU
68
  def qa_interface(audio_file, translate, model_size, question):
69
  _, full_text = transcribe_audio(audio_file, translate, model_size)
70
  answer = answer_question(full_text, question)