FILL-IT / app.py
umarmajeedofficial's picture
Update app.py
bc2ba25 verified
raw
history blame
5.78 kB
import streamlit as st
import zipfile
import tempfile
import requests
import pdfplumber
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
import os
import warnings
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
# Suppress warnings
warnings.filterwarnings("ignore")
# Setup models
device = "cuda:0" if torch.cuda.is_available() else "cpu"
whisper_model_id = "openai/whisper-medium"
# Load Whisper model and processor
whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(whisper_model_id)
whisper_processor = AutoProcessor.from_pretrained(whisper_model_id)
# Create Whisper pipeline
whisper_pipe = pipeline(
"automatic-speech-recognition",
model=whisper_model,
tokenizer=whisper_processor.tokenizer,
feature_extractor=whisper_processor.feature_extractor,
device=device
)
# IBM Granite API URL and Headers
granite_url = "https://us-south.ml.cloud.ibm.com/ml/v1/text/generation?version=2023-05-29"
granite_headers = {
"Accept": "application/json",
"Content-Type": "application/json",
"Authorization": "Bearer YOUR_API_KEY_HERE" # Replace with your actual API key
}
# Function to transcribe audio files
def transcribe_audio(file_path):
result = whisper_pipe(file_path)
return result['text']
# Function to extract text and questions from PDF
def extract_text_from_pdf(pdf_path):
text = ""
questions = []
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
page_text = page.extract_text()
if page_text:
text += page_text
questions += [line.strip() for line in page_text.split("\n") if line.strip()]
return text, questions
# Function to generate form data with Granite
def generate_form_data(text, questions):
question_list = "\n".join(f"- {question}" for question in questions)
body = {
"input": f"""The following text is a transcript from an audio recording. Read the text and extract the information needed to fill out the following form.\n\nText: {text}\n\nForm Questions:\n{question_list}\n\nExtracted Form Data:""",
"parameters": {
"decoding_method": "sample",
"max_new_tokens": 900,
"temperature": 0.7,
"top_k": 50,
"top_p": 1,
"repetition_penalty": 1.05
},
"model_id": "ibm/granite-13b-chat-v2",
"project_id": "YOUR_PROJECT_ID", # Replace with your actual project ID
"moderations": {
"hap": {
"input": {
"enabled": True,
"threshold": 0.5,
"mask": {"remove_entity_value": True}
},
"output": {
"enabled": True,
"threshold": 0.5,
"mask": {"remove_entity_value": True}
}
}
}
}
response = requests.post(granite_url, headers=granite_headers, json=body)
if response.status_code != 200:
raise Exception("Non-200 response: " + str(response.text))
data = response.json()
return data['results'][0]['generated_text'].strip()
# Function to save responses to PDF
def save_responses_to_pdf(responses, output_pdf_path):
document = SimpleDocTemplate(output_pdf_path, pagesize=letter)
styles = getSampleStyleSheet()
# Custom style for numbered responses
number_style = ParagraphStyle(
name='NumberedStyle',
parent=styles['BodyText'],
fontSize=10,
spaceAfter=12
)
content = []
for index, response in enumerate(responses, start=1):
# Add the response number and content
heading = Paragraph(f"<b>File {index}:</b>", styles['Heading2'])
response_text = Paragraph(response.replace("\n", "<br/>"), number_style)
content.append(heading)
content.append(Spacer(1, 6)) # Space between heading and response
content.append(response_text)
content.append(Spacer(1, 18)) # Space between responses
document.build(content)
# Streamlit Interface
st.title("Audio to Form Filling")
zip_file = st.file_uploader("Upload ZIP File with Audio Files", type="zip")
pdf_file = st.file_uploader("Upload PDF Form", type="pdf")
if zip_file and pdf_file:
with tempfile.TemporaryDirectory() as tmp_dir:
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
zip_ref.extractall(tmp_dir)
responses = []
for filename in os.listdir(tmp_dir):
if filename.endswith((".wav", ".mp3")):
file_path = os.path.join(tmp_dir, filename)
# Transcribe audio
transcribed_text = transcribe_audio(file_path)
# Extract text and form fields from PDF
pdf_text, pdf_questions = extract_text_from_pdf(pdf_file)
# Generate form data
form_data = generate_form_data(transcribed_text, pdf_questions)
responses.append(form_data)
st.write(f"File {len(responses)}:\n{form_data}\n") # Display the extracted form data with numbering
# Save all responses to a PDF
output_pdf_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
save_responses_to_pdf(responses, output_pdf_path)
# Provide a download button for the generated PDF
with open(output_pdf_path, "rb") as f:
st.download_button(
label="Download Processed PDF",
data=f,
file_name="processed_output.pdf",
mime="application/pdf"
)