Spaces:
Sleeping
Sleeping
Umar Majeed
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -34,15 +34,15 @@ flan_t5_tokenizer = T5Tokenizer.from_pretrained(flan_t5_model_id)
|
|
| 34 |
flan_t5_model = T5ForConditionalGeneration.from_pretrained(flan_t5_model_id)
|
| 35 |
|
| 36 |
# Function to transcribe audio files
|
| 37 |
-
def transcribe_audio(
|
| 38 |
-
result = whisper_pipe(
|
| 39 |
return result['text']
|
| 40 |
|
| 41 |
# Function to extract text and questions from PDF
|
| 42 |
-
def extract_text_from_pdf(
|
| 43 |
text = ""
|
| 44 |
questions = []
|
| 45 |
-
with pdfplumber.open(
|
| 46 |
for page in pdf.pages:
|
| 47 |
page_text = page.extract_text()
|
| 48 |
if page_text:
|
|
@@ -113,19 +113,19 @@ def save_responses_to_pdf(responses, output_pdf_path):
|
|
| 113 |
# Gradio interface function
|
| 114 |
def process_files(audio_files, pdf_file):
|
| 115 |
responses = []
|
| 116 |
-
for
|
| 117 |
# Transcribe audio
|
| 118 |
transcribed_text = transcribe_audio(audio_file.name)
|
| 119 |
# Extract text and form fields from PDF
|
| 120 |
pdf_text, pdf_questions = extract_text_from_pdf(pdf_file.name)
|
| 121 |
# Generate form data
|
| 122 |
form_data = generate_form_data(transcribed_text, pdf_questions)
|
| 123 |
-
responses.append(
|
| 124 |
-
|
| 125 |
# Save all responses to a PDF
|
| 126 |
-
output_pdf_path = "
|
| 127 |
save_responses_to_pdf(responses, output_pdf_path)
|
| 128 |
-
|
| 129 |
# Return the PDF path and the generated responses
|
| 130 |
return output_pdf_path, "\n\n".join(responses)
|
| 131 |
|
|
@@ -133,8 +133,8 @@ def process_files(audio_files, pdf_file):
|
|
| 133 |
interface = gr.Interface(
|
| 134 |
fn=process_files,
|
| 135 |
inputs=[
|
| 136 |
-
gr.Files(label="Upload Audio Dataset"
|
| 137 |
-
gr.File(label="Upload PDF File with Questions"
|
| 138 |
],
|
| 139 |
outputs=[
|
| 140 |
gr.File(label="Download Output PDF"),
|
|
|
|
| 34 |
flan_t5_model = T5ForConditionalGeneration.from_pretrained(flan_t5_model_id)
|
| 35 |
|
| 36 |
# Function to transcribe audio files
|
| 37 |
+
def transcribe_audio(file):
|
| 38 |
+
result = whisper_pipe(file)
|
| 39 |
return result['text']
|
| 40 |
|
| 41 |
# Function to extract text and questions from PDF
|
| 42 |
+
def extract_text_from_pdf(pdf_file):
|
| 43 |
text = ""
|
| 44 |
questions = []
|
| 45 |
+
with pdfplumber.open(pdf_file) as pdf:
|
| 46 |
for page in pdf.pages:
|
| 47 |
page_text = page.extract_text()
|
| 48 |
if page_text:
|
|
|
|
| 113 |
# Gradio interface function
|
| 114 |
def process_files(audio_files, pdf_file):
|
| 115 |
responses = []
|
| 116 |
+
for audio_file in audio_files:
|
| 117 |
# Transcribe audio
|
| 118 |
transcribed_text = transcribe_audio(audio_file.name)
|
| 119 |
# Extract text and form fields from PDF
|
| 120 |
pdf_text, pdf_questions = extract_text_from_pdf(pdf_file.name)
|
| 121 |
# Generate form data
|
| 122 |
form_data = generate_form_data(transcribed_text, pdf_questions)
|
| 123 |
+
responses.append(form_data)
|
| 124 |
+
|
| 125 |
# Save all responses to a PDF
|
| 126 |
+
output_pdf_path = "output.pdf"
|
| 127 |
save_responses_to_pdf(responses, output_pdf_path)
|
| 128 |
+
|
| 129 |
# Return the PDF path and the generated responses
|
| 130 |
return output_pdf_path, "\n\n".join(responses)
|
| 131 |
|
|
|
|
| 133 |
interface = gr.Interface(
|
| 134 |
fn=process_files,
|
| 135 |
inputs=[
|
| 136 |
+
gr.Files(label="Upload Audio Dataset"),
|
| 137 |
+
gr.File(label="Upload PDF File with Questions")
|
| 138 |
],
|
| 139 |
outputs=[
|
| 140 |
gr.File(label="Download Output PDF"),
|