palbha commited on
Commit
f9c002c
·
verified ·
1 Parent(s): f395ca9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -9
app.py CHANGED
@@ -1,31 +1,45 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer
4
 
5
- # Whisper Model for Transcription
6
  WHISPER_MODEL = "openai/whisper-large-v3"
7
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  transcriber = pipeline(
10
  task="automatic-speech-recognition",
11
- model=WHISPER_MODEL,
 
12
  chunk_length_s=30,
13
- device=DEVICE,
14
  )
15
 
16
- # LLaMA Model for Generating Meeting Minutes
17
- LLAMA = "meta-llama/Llama-2-7b-chat-hf" # Change to your preferred model
18
- quant_config = BitsAndBytesConfig(
19
  load_in_4bit=True,
20
  bnb_4bit_use_double_quant=True,
21
  bnb_4bit_compute_dtype=torch.bfloat16,
22
  bnb_4bit_quant_type="nf4"
23
  )
24
 
25
- # Load Model & Tokenizer
26
  tokenizer = AutoTokenizer.from_pretrained(LLAMA)
27
  tokenizer.pad_token = tokenizer.eos_token
28
- model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map="auto", quantization_config=quant_config)
29
 
30
  # Function to Transcribe & Generate Minutes
31
  def process_audio(audio_file):
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer, AutoModelForSpeechSeq2Seq
4
 
5
+ # Whisper Model Optimization
6
  WHISPER_MODEL = "openai/whisper-large-v3"
7
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
8
 
9
+ whisper_quant_config = BitsAndBytesConfig(
10
+ load_in_4bit=True,
11
+ bnb_4bit_use_double_quant=True,
12
+ bnb_4bit_compute_dtype=torch.bfloat16,
13
+ bnb_4bit_quant_type="nf4"
14
+ )
15
+
16
+ whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(
17
+ WHISPER_MODEL,
18
+ device_map="auto",
19
+ quantization_config=whisper_quant_config
20
+ )
21
+
22
+ whisper_tokenizer = AutoTokenizer.from_pretrained(WHISPER_MODEL)
23
  transcriber = pipeline(
24
  task="automatic-speech-recognition",
25
+ model=whisper_model,
26
+ tokenizer=whisper_tokenizer,
27
  chunk_length_s=30,
28
+ device=DEVICE
29
  )
30
 
31
+ # LLaMA Model Optimization
32
+ LLAMA = "meta-llama/Llama-2-7b-chat-hf"
33
+ llama_quant_config = BitsAndBytesConfig(
34
  load_in_4bit=True,
35
  bnb_4bit_use_double_quant=True,
36
  bnb_4bit_compute_dtype=torch.bfloat16,
37
  bnb_4bit_quant_type="nf4"
38
  )
39
 
 
40
  tokenizer = AutoTokenizer.from_pretrained(LLAMA)
41
  tokenizer.pad_token = tokenizer.eos_token
42
+ model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map="auto", quantization_config=llama_quant_config)
43
 
44
  # Function to Transcribe & Generate Minutes
45
  def process_audio(audio_file):