Kr08 commited on
Commit
3a346c4
·
verified ·
1 Parent(s): a314490

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -14
app.py CHANGED
@@ -3,23 +3,40 @@ from audio_processing import process_audio, load_models
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForQuestionAnswering
4
  import spaces
5
  import torch
 
 
 
 
6
 
7
  # Check if CUDA is available
8
  cuda_available = torch.cuda.is_available()
9
  device = "cuda" if cuda_available else "cpu"
 
10
 
11
  # Load models globally
12
  print("Loading models...")
13
- load_models() # Load Whisper and diarization models
14
-
15
- summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn").to(device)
16
- summarizer_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- qa_model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-cased-distilled-squad").to(device)
19
- qa_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased-distilled-squad")
20
  print("Models loaded successfully.")
21
 
22
-
23
  @spaces.GPU
24
  def transcribe_audio(audio_file, translate, model_size):
25
  language_segments, final_segments = process_audio(audio_file, translate=translate, model_size=model_size)
@@ -43,7 +60,6 @@ def transcribe_audio(audio_file, translate, model_size):
43
 
44
  return output, full_text
45
 
46
-
47
  @spaces.GPU
48
  def summarize_text(text):
49
  inputs = summarizer_tokenizer(text, max_length=1024, truncation=True, return_tensors="pt").to(device)
@@ -51,7 +67,6 @@ def summarize_text(text):
51
  summary = summarizer_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
52
  return summary
53
 
54
-
55
  @spaces.GPU
56
  def answer_question(context, question):
57
  inputs = qa_tokenizer(question, context, return_tensors="pt").to(device)
@@ -61,21 +76,18 @@ def answer_question(context, question):
61
  answer = qa_tokenizer.decode(inputs["input_ids"][0][answer_start:answer_end])
62
  return answer
63
 
64
-
65
  @spaces.GPU
66
  def process_and_summarize(audio_file, translate, model_size):
67
  transcription, full_text = transcribe_audio(audio_file, translate, model_size)
68
  summary = summarize_text(full_text)
69
  return transcription, summary
70
 
71
-
72
  @spaces.GPU
73
  def qa_interface(audio_file, translate, model_size, question):
74
  _, full_text = transcribe_audio(audio_file, translate, model_size)
75
  answer = answer_question(full_text, question)
76
  return answer
77
 
78
-
79
  # Main interface
80
  with gr.Blocks() as iface:
81
  gr.Markdown("# WhisperX Audio Transcription, Translation, Summarization, and QA (with ZeroGPU support)")
@@ -109,10 +121,14 @@ with gr.Blocks() as iface:
109
  )
110
 
111
  gr.Markdown(
112
- """
 
 
 
 
113
  ## ZeroGPU Support
114
  This application supports ZeroGPU for Hugging Face Spaces pro users.
115
- GPU-intensive tasks are automatically optimized for better performance.
116
  """
117
  )
118
 
 
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForQuestionAnswering
4
  import spaces
5
  import torch
6
+ import logging
7
+
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger(__name__)
10
 
11
  # Check if CUDA is available
12
  cuda_available = torch.cuda.is_available()
13
  device = "cuda" if cuda_available else "cpu"
14
+ logger.info(f"Using device: {device}")
15
 
16
  # Load models globally
17
  print("Loading models...")
18
+ try:
19
+ load_models() # Load Whisper and diarization models
20
+ except Exception as e:
21
+ logger.error(f"Error loading Whisper and diarization models: {str(e)}")
22
+ raise
23
+
24
+ try:
25
+ summarizer_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn").to(device)
26
+ summarizer_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
27
+ except Exception as e:
28
+ logger.error(f"Error loading summarization model: {str(e)}")
29
+ raise
30
+
31
+ try:
32
+ qa_model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-cased-distilled-squad").to(device)
33
+ qa_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased-distilled-squad")
34
+ except Exception as e:
35
+ logger.error(f"Error loading QA model: {str(e)}")
36
+ raise
37
 
 
 
38
  print("Models loaded successfully.")
39
 
 
40
  @spaces.GPU
41
  def transcribe_audio(audio_file, translate, model_size):
42
  language_segments, final_segments = process_audio(audio_file, translate=translate, model_size=model_size)
 
60
 
61
  return output, full_text
62
 
 
63
  @spaces.GPU
64
  def summarize_text(text):
65
  inputs = summarizer_tokenizer(text, max_length=1024, truncation=True, return_tensors="pt").to(device)
 
67
  summary = summarizer_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
68
  return summary
69
 
 
70
  @spaces.GPU
71
  def answer_question(context, question):
72
  inputs = qa_tokenizer(question, context, return_tensors="pt").to(device)
 
76
  answer = qa_tokenizer.decode(inputs["input_ids"][0][answer_start:answer_end])
77
  return answer
78
 
 
79
  @spaces.GPU
80
  def process_and_summarize(audio_file, translate, model_size):
81
  transcription, full_text = transcribe_audio(audio_file, translate, model_size)
82
  summary = summarize_text(full_text)
83
  return transcription, summary
84
 
 
85
  @spaces.GPU
86
  def qa_interface(audio_file, translate, model_size, question):
87
  _, full_text = transcribe_audio(audio_file, translate, model_size)
88
  answer = answer_question(full_text, question)
89
  return answer
90
 
 
91
  # Main interface
92
  with gr.Blocks() as iface:
93
  gr.Markdown("# WhisperX Audio Transcription, Translation, Summarization, and QA (with ZeroGPU support)")
 
121
  )
122
 
123
  gr.Markdown(
124
+ f"""
125
+ ## System Information
126
+ - Device: {device}
127
+ - CUDA Available: {"Yes" if cuda_available else "No"}
128
+
129
  ## ZeroGPU Support
130
  This application supports ZeroGPU for Hugging Face Spaces pro users.
131
+ GPU-intensive tasks are automatically optimized for better performance when available.
132
  """
133
  )
134