umarmajeedofficial commited on
Commit
a5cb220
·
verified ·
1 Parent(s): 13bd1b3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -0
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import warnings
3
+ import torch
4
+ import gradio as gr
5
+ from transformers import T5Tokenizer, T5ForConditionalGeneration, pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
6
+ import pdfplumber
7
+ from reportlab.lib.pagesizes import letter
8
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
9
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
10
+
11
+ # Suppress warnings globally
12
+ warnings.filterwarnings("ignore")
13
+
14
+ # Setup models
15
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
16
+ whisper_model_id = "openai/whisper-medium"
17
+
18
+ # Load Whisper model and processor
19
+ whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(whisper_model_id)
20
+ whisper_processor = AutoProcessor.from_pretrained(whisper_model_id)
21
+
22
+ # Create Whisper pipeline
23
+ whisper_pipe = pipeline(
24
+ "automatic-speech-recognition",
25
+ model=whisper_model,
26
+ tokenizer=whisper_processor.tokenizer,
27
+ feature_extractor=whisper_processor.feature_extractor,
28
+ device=device
29
+ )
30
+
31
+ # Setup FLAN-T5 model and tokenizer
32
+ flan_t5_model_id = "google/flan-t5-large"
33
+ flan_t5_tokenizer = T5Tokenizer.from_pretrained(flan_t5_model_id)
34
+ flan_t5_model = T5ForConditionalGeneration.from_pretrained(flan_t5_model_id)
35
+
36
+ # Function to transcribe audio files
37
+ def transcribe_audio(file_path):
38
+ result = whisper_pipe(file_path)
39
+ return result['text']
40
+
41
+ # Function to extract text and questions from PDF
42
+ def extract_text_from_pdf(pdf_path):
43
+ text = ""
44
+ questions = []
45
+ with pdfplumber.open(pdf_path) as pdf:
46
+ for page in pdf.pages:
47
+ page_text = page.extract_text()
48
+ if page_text:
49
+ text += page_text
50
+ # Extract questions based on numbering
51
+ lines = page_text.split("\n")
52
+ for line in lines:
53
+ if line.strip() and line.strip()[0].isdigit():
54
+ questions.append(line.strip())
55
+ return text, questions
56
+
57
+ # Function to generate form data with FLAN-T5
58
+ def generate_form_data(text, questions):
59
+ responses = []
60
+ for question in questions:
61
+ input_text = f"""The following text is a transcript from an audio recording. Read the text and answer the following question in a complete sentence.\n\nText: {text}\n\nQuestion: {question}\n\nAnswer:"""
62
+
63
+ # Tokenize the input text
64
+ inputs = flan_t5_tokenizer(input_text, return_tensors='pt', max_length=1024, truncation=True)
65
+
66
+ # Generate the answer using the model
67
+ with torch.no_grad():
68
+ outputs = flan_t5_model.generate(**inputs, max_length=100)
69
+
70
+ # Decode the generated text
71
+ generated_text = flan_t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
72
+
73
+ # Handle incomplete or missing answers
74
+ if not generated_text.strip():
75
+ generated_text = "The answer to this question is not present in the script."
76
+ elif len(generated_text.strip()) < 10: # Arbitrary threshold for short/incomplete answers
77
+ input_text = f"""Based on the following transcript, provide a more detailed answer to the question.\n\nText: {text}\n\nQuestion: {question}\n\nAnswer:"""
78
+ inputs = flan_t5_tokenizer(input_text, return_tensors='pt', max_length=1024, truncation=True)
79
+ outputs = flan_t5_model.generate(**inputs, max_length=100)
80
+ generated_text = flan_t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
81
+
82
+ # Append question and response
83
+ responses.append(f"Question: {question}\nAnswer: {generated_text.strip()}")
84
+
85
+ return "\n\n".join(responses)
86
+
87
+ # Function to save responses to PDF
88
+ def save_responses_to_pdf(responses, output_pdf_path):
89
+ document = SimpleDocTemplate(output_pdf_path, pagesize=letter)
90
+ styles = getSampleStyleSheet()
91
+
92
+ # Custom style for responses
93
+ response_style = ParagraphStyle(
94
+ name='ResponseStyle',
95
+ parent=styles['BodyText'],
96
+ fontSize=10,
97
+ spaceAfter=12
98
+ )
99
+
100
+ content = []
101
+ for index, response in enumerate(responses, start=1):
102
+ # Add the response number and content
103
+ heading = Paragraph(f"<b>File {index}:</b>", styles['Heading2'])
104
+ response_text = Paragraph(response.replace("\n", "<br/>"), response_style)
105
+
106
+ content.append(heading)
107
+ content.append(Spacer(1, 6)) # Space between heading and response
108
+ content.append(response_text)
109
+ content.append(Spacer(1, 18)) # Space between responses
110
+
111
+ document.build(content)
112
+
113
+ # Gradio interface function
114
+ def process_files(audio_files, pdf_file):
115
+ responses = []
116
+ for audio_file in audio_files:
117
+ # Transcribe audio
118
+ transcribed_text = transcribe_audio(audio_file.name)
119
+ # Extract text and form fields from PDF
120
+ pdf_text, pdf_questions = extract_text_from_pdf(pdf_file.name)
121
+ # Generate form data
122
+ form_data = generate_form_data(transcribed_text, pdf_questions)
123
+ responses.append(form_data)
124
+
125
+ # Save all responses to a PDF
126
+ output_pdf_path = "output.pdf"
127
+ save_responses_to_pdf(responses, output_pdf_path)
128
+
129
+ return output_pdf_path
130
+
131
+ # Gradio interface definition
132
+ interface = gr.Interface(
133
+ fn=process_files,
134
+ inputs=[
135
+ gr.Files(label="Upload Audio Files", type="filepath"),
136
+ gr.File(label="Upload PDF File", type="filepath")
137
+ ],
138
+ outputs=gr.File(label="Download Output PDF")
139
+ )
140
+
141
+ # Launch the interface
142
+ interface.launch()