1 |
import streamlit as st
2 |
import requests
3 |
import pdfplumber
4 |
import torch
5 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
6 |
from reportlab.lib.pagesizes import letter
7 |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
8 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
9 |
from io import BytesIO
10 |
import os
11 |
12 |
# Suppress warnings
13 |
import warnings
14 |
15 |
16 |
# Setup models
30 |
31 |
32 |
33 |
granite_url = ""
34 |
granite_headers = {
35 |
"Accept": "application/json",
36 |
"Content-Type": "application/json",
37 |
"Authorization": "Bearer eyJraWQiOiIyMDI0MDgwMzA4NDEiLCJhbGciOiJSUzI1NiJ9.eyJpYW1faWQiOiJJQk1pZC02OTQwMDBJTlNIIiwiaWQiOiJJQk1pZC02OTQwMDBJTlNIIiwicmVhbG1pZCI6IklCTWlkIiwianRpIjoiODdkNzc1NWUtNzU4Ny00Nzc0LWI4NzAtZjkyNGQ3MGIxNmEzIiwiaWRlbnRpZmllciI6IjY5NDAwMElOU0giLCJnaXZlbl9uYW1lIjoiVW1hciIsImZhbWlseV9uYW1lIjoiTWFqZWVkIiwibmFtZSI6IlVtYXIgTWFqZWVkIiwiZW1haWwiOiJ1bWFybWFqZWVkb2ZmaWNpYWxAZ21haWwuY29tIiwic3ViIjoidW1hcm1hamVlZG9mZmljaWFsQGdtYWlsLmNvbSIsImF1dGhuIjp7InN1YiI6InVtYXJtYWplZWRvZmZpY2lhbEBnbWFpbC5jb20iLCJpYW1faWQiOiJJQk1pZC02OTQwMDBJTlNIIiwibmFtZSI6IlVtYXIgTWFqZWVkIiwiZ2l2ZW5fbmFtZSI6IlVtYXIiLCJmYW1pbHlfbmFtZSI6Ik1hamVlZCIsImVtYWlsIjoidW1hcm1hamVlZG9mZmljaWFsQGdtYWlsLmNvbSJ9LCJhY2NvdW50Ijp7InZhbGlkIjp0cnVlLCJic3MiOiIyZTY5MjI1ZjNmMjc0Nzc2ODkwMGE2MGQ5MDBkM2UzNyIsImltc191c2VyX2lkIjoiMTI2MjI5MTciLCJmcm96ZW4iOnRydWUsImltcyI6IjI3NDQzNDQifSwiaWF0IjoxNzI0Njc4Njc3LCJleHAiOjE3MjQ2ODIyNzcsImlzcyI6Imh0dHBzOi8vaWFtLmNsb3VkLmlibS5jb20vaWRlbnRpdHkiLCJncmFudF90eXBlIjoidXJuOmlibTpwYXJhbXM6b2F1dGg6Z3JhbnQtdHlwZTphcGlrZXkiLCJzY29wZSI6ImlibSBvcGVuaWQiLCJjbGllbnRfaWQiOiJkZWZhdWx0IiwiYWNyIjoxLCJhbXIiOlsicHdkIl19.fmiLcZExa22sN_8Xx3_e-VTvZQVvMqmAi_QiA4NKCV40ni8bobxiFEeBKyv8MpafA405jSzFYQUPRFmBy6XNpvVMWpIYKqsZao7l_EDtqXLDRkM_SySUhZtK4CHu-o6qiLyyObBGabke7niaqXuDhzfvpmZCvA98542aeEwSbYZe6siI9_l05xW1T__fIvKak9Y0Fkf7srAmwW7b0NmezQ0VLH13-hANFm0aXh_sEBT0pGujeyRV6X0Bl0zbNW2YurQzdug23BtdS-IR2xbjoAq9KqsSFK2PUMlA_ENg5oKR00sUqCl3gVvVMRNCFbdSkDnaSv2NWDHH-yhE2LwgTw" # Replace with your actual API key
38 |
39 |
40 |
41 |
42 |
# Function to transcribe audio files
43 |
def transcribe_audio(file):
44 |
result = whisper_pipe(file)
45 |
return result['text']
46 |
47 |
# Function to extract text and questions from PDF
48 |
def extract_text_from_pdf(pdf_file):
49 |
text = ""
50 |
questions = []
51 |
with as pdf:
52 |
for page in pdf.pages:
53 |
page_text = page.extract_text()
54 |
if page_text:
70 |
"repetition_penalty": 1.05
71 |
72 |
"model_id": "ibm/granite-13b-chat-v2",
73 |
"project_id": "698f0da7-6b34-4642-8540-978e70e85c8e", # Replace with your actual project ID
74 |
"moderations": {
75 |
"hap": {
76 |
"input": {
93 |
return data['results'][0]['generated_text'].strip()
94 |
95 |
# Function to save responses to PDF
96 |
def save_responses_to_pdf(responses):
97 |
buffer = BytesIO()
98 |
document = SimpleDocTemplate(buffer, pagesize=letter)
99 |
styles = getSampleStyleSheet()
100 |
101 |
# Custom style for numbered responses
119 |
content.append(Spacer(1, 18)) # Space between responses
120 |
121 |
122 |
123 |
return buffer
124 |
125 |
# Streamlit app
126 |
st.title("FILL IT: By Umar Majeed")
127 |
128 |
uploaded_audio_files = st.file_uploader("Upload audio files", type=["wav", "mp3"], accept_multiple_files=True)
129 |
uploaded_pdf = st.file_uploader("Upload PDF form", type=["pdf"])
130 |
131 |
if uploaded_audio_files and uploaded_pdf:
132 |
responses = []
133 |
134 |
for audio_file in uploaded_audio_files:
135 |
# Transcribe audio
136 |
transcribed_text = transcribe_audio(audio_file)
137 |
# Extract text and form fields from PDF
138 |
pdf_text, pdf_questions = extract_text_from_pdf(uploaded_pdf)
139 |
# Generate form data
140 |
form_data = generate_form_data(transcribed_text, pdf_questions)
141 |
142 |
st.write(f"Extracted form data for {}:")
143 |
144 |
145 |
if responses:
146 |
# Save responses to PDF
147 |
response_pdf_buffer = save_responses_to_pdf(responses)
148 |
149 |
label="Download Response PDF",
150 |
151 |
152 |
153 |