Spaces:
Sleeping
Sleeping
import streamlit as st | |
import zipfile | |
import tempfile | |
import requests | |
import pdfplumber | |
import torch | |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
import os | |
import warnings | |
from reportlab.lib.pagesizes import letter | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
# Suppress warnings | |
warnings.filterwarnings("ignore") | |
# Setup models | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
whisper_model_id = "openai/whisper-medium" | |
# Load Whisper model and processor | |
whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(whisper_model_id) | |
whisper_processor = AutoProcessor.from_pretrained(whisper_model_id) | |
# Create Whisper pipeline | |
whisper_pipe = pipeline( | |
"automatic-speech-recognition", | |
model=whisper_model, | |
tokenizer=whisper_processor.tokenizer, | |
feature_extractor=whisper_processor.feature_extractor, | |
device=device | |
) | |
# IBM Granite API URL and Headers | |
granite_url = "https://us-south.ml.cloud.ibm.com/ml/v1/text/generation?version=2023-05-29" | |
granite_headers = { | |
"Accept": "application/json", | |
"Content-Type": "application/json", | |
"Authorization": "Bearer YOUR_API_KEY_HERE" # Replace with your actual API key | |
} | |
# Function to transcribe audio files | |
def transcribe_audio(file_path): | |
result = whisper_pipe(file_path) | |
return result['text'] | |
# Function to extract text and questions from PDF | |
def extract_text_from_pdf(pdf_path): | |
text = "" | |
questions = [] | |
with pdfplumber.open(pdf_path) as pdf: | |
for page in pdf.pages: | |
page_text = page.extract_text() | |
if page_text: | |
text += page_text | |
questions += [line.strip() for line in page_text.split("\n") if line.strip()] | |
return text, questions | |
# Function to generate form data with Granite | |
def generate_form_data(text, questions): | |
question_list = "\n".join(f"- {question}" for question in questions) | |
body = { | |
"input": f"""The following text is a transcript from an audio recording. Read the text and extract the information needed to fill out the following form.\n\nText: {text}\n\nForm Questions:\n{question_list}\n\nExtracted Form Data:""", | |
"parameters": { | |
"decoding_method": "sample", | |
"max_new_tokens": 900, | |
"temperature": 0.7, | |
"top_k": 50, | |
"top_p": 1, | |
"repetition_penalty": 1.05 | |
}, | |
"model_id": "ibm/granite-13b-chat-v2", | |
"project_id": "YOUR_PROJECT_ID", # Replace with your actual project ID | |
"moderations": { | |
"hap": { | |
"input": { | |
"enabled": True, | |
"threshold": 0.5, | |
"mask": {"remove_entity_value": True} | |
}, | |
"output": { | |
"enabled": True, | |
"threshold": 0.5, | |
"mask": {"remove_entity_value": True} | |
} | |
} | |
} | |
} | |
response = requests.post(granite_url, headers=granite_headers, json=body) | |
if response.status_code != 200: | |
raise Exception("Non-200 response: " + str(response.text)) | |
data = response.json() | |
return data['results'][0]['generated_text'].strip() | |
# Function to save responses to PDF | |
def save_responses_to_pdf(responses, output_pdf_path): | |
document = SimpleDocTemplate(output_pdf_path, pagesize=letter) | |
styles = getSampleStyleSheet() | |
# Custom style for numbered responses | |
number_style = ParagraphStyle( | |
name='NumberedStyle', | |
parent=styles['BodyText'], | |
fontSize=10, | |
spaceAfter=12 | |
) | |
content = [] | |
for index, response in enumerate(responses, start=1): | |
# Add the response number and content | |
heading = Paragraph(f"<b>File {index}:</b>", styles['Heading2']) | |
response_text = Paragraph(response.replace("\n", "<br/>"), number_style) | |
content.append(heading) | |
content.append(Spacer(1, 6)) # Space between heading and response | |
content.append(response_text) | |
content.append(Spacer(1, 18)) # Space between responses | |
document.build(content) | |
# Streamlit Interface | |
st.title("Audio to Form Filling") | |
zip_file = st.file_uploader("Upload ZIP File with Audio Files", type="zip") | |
pdf_file = st.file_uploader("Upload PDF Form", type="pdf") | |
if zip_file and pdf_file: | |
with tempfile.TemporaryDirectory() as tmp_dir: | |
with zipfile.ZipFile(zip_file, 'r') as zip_ref: | |
zip_ref.extractall(tmp_dir) | |
responses = [] | |
for filename in os.listdir(tmp_dir): | |
if filename.endswith((".wav", ".mp3")): | |
file_path = os.path.join(tmp_dir, filename) | |
# Transcribe audio | |
transcribed_text = transcribe_audio(file_path) | |
# Extract text and form fields from PDF | |
pdf_text, pdf_questions = extract_text_from_pdf(pdf_file) | |
# Generate form data | |
form_data = generate_form_data(transcribed_text, pdf_questions) | |
responses.append(form_data) | |
st.write(f"File {len(responses)}:\n{form_data}\n") # Display the extracted form data with numbering | |
# Save all responses to a PDF | |
output_pdf_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name | |
save_responses_to_pdf(responses, output_pdf_path) | |
# Provide a download button for the generated PDF | |
with open(output_pdf_path, "rb") as f: | |
st.download_button( | |
label="Download Processed PDF", | |
data=f, | |
file_name="processed_output.pdf", | |
mime="application/pdf" | |
) | |