|
import gradio as gr |
|
import torch |
|
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer, AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline |
|
from huggingface_hub import login |
|
import os |
|
|
|
|
|
token = os.getenv("HF_TOKEN") |
|
login(token=token) |
|
|
|
|
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_audio(audio_file): |
|
if audio_file is None: |
|
return "Error: No audio provided!" |
|
|
|
|
|
model = "openai/whisper-tiny" |
|
processor = AutoProcessor.from_pretrained(model) |
|
|
|
transcriber = pipeline( |
|
"automatic-speech-recognition", |
|
model=model, |
|
tokenizer=processor.tokenizer, |
|
feature_extractor=processor.feature_extractor, |
|
device=0 if torch.cuda.is_available() else "cpu", |
|
) |
|
|
|
transcript = transcriber(audio_file,return_timestamps=True)["text"] |
|
del transcriber |
|
del processor |
|
|
|
LLAMA = "meta-llama/Llama-3.2-3B-Instruct" |
|
llama_quant_config = BitsAndBytesConfig( |
|
load_in_4bit=True, |
|
bnb_4bit_use_double_quant=True, |
|
bnb_4bit_compute_dtype=torch.bfloat16, |
|
bnb_4bit_quant_type="nf4" |
|
) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(LLAMA) |
|
tokenizer.pad_token = tokenizer.eos_token |
|
model = AutoModelForCausalLM.from_pretrained( |
|
LLAMA, |
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
|
device_map="auto" |
|
) |
|
|
|
system_message = "You are an assistant that produces minutes of meetings from transcripts, with summary, key discussion points, takeaways and action items with owners, in markdown." |
|
user_prompt = f"Below is an extract transcript of a Denver council meeting. Please write minutes in markdown, including a summary with attendees, location and date; discussion points; takeaways; and action items with owners.\n{transcript}" |
|
|
|
messages = [ |
|
{"role": "system", "content": system_message}, |
|
{"role": "user", "content": user_prompt} |
|
] |
|
|
|
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(DEVICE) |
|
streamer = TextStreamer(tokenizer) |
|
outputs = model.generate(inputs, max_new_tokens=2000, streamer=streamer) |
|
|
|
return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
interface = gr.Interface( |
|
fn=process_audio, |
|
inputs=gr.Audio(sources=["upload", "microphone"], type="filepath"), |
|
outputs="text", |
|
title="Meeting Minutes Generator", |
|
description="Upload or record an audio file to get structured meeting minutes in Markdown.", |
|
) |
|
|
|
|
|
interface.launch() |
|
|