Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -34,15 +34,15 @@ flan_t5_tokenizer = T5Tokenizer.from_pretrained(flan_t5_model_id)
|
|
34 |
flan_t5_model = T5ForConditionalGeneration.from_pretrained(flan_t5_model_id)
|
35 |
|
36 |
# Function to transcribe audio files
|
37 |
-
def transcribe_audio(
|
38 |
-
result = whisper_pipe(
|
39 |
return result['text']
|
40 |
|
41 |
# Function to extract text and questions from PDF
|
42 |
-
def extract_text_from_pdf(
|
43 |
text = ""
|
44 |
questions = []
|
45 |
-
with pdfplumber.open(
|
46 |
for page in pdf.pages:
|
47 |
page_text = page.extract_text()
|
48 |
if page_text:
|
@@ -113,19 +113,19 @@ def save_responses_to_pdf(responses, output_pdf_path):
|
|
113 |
# Gradio interface function
|
114 |
def process_files(audio_files, pdf_file):
|
115 |
responses = []
|
116 |
-
for
|
117 |
# Transcribe audio
|
118 |
transcribed_text = transcribe_audio(audio_file.name)
|
119 |
# Extract text and form fields from PDF
|
120 |
pdf_text, pdf_questions = extract_text_from_pdf(pdf_file.name)
|
121 |
# Generate form data
|
122 |
form_data = generate_form_data(transcribed_text, pdf_questions)
|
123 |
-
responses.append(
|
124 |
-
|
125 |
# Save all responses to a PDF
|
126 |
-
output_pdf_path = "
|
127 |
save_responses_to_pdf(responses, output_pdf_path)
|
128 |
-
|
129 |
# Return the PDF path and the generated responses
|
130 |
return output_pdf_path, "\n\n".join(responses)
|
131 |
|
@@ -133,8 +133,8 @@ def process_files(audio_files, pdf_file):
|
|
133 |
interface = gr.Interface(
|
134 |
fn=process_files,
|
135 |
inputs=[
|
136 |
-
gr.Files(label="Upload Audio Dataset"
|
137 |
-
gr.File(label="Upload PDF File with Questions"
|
138 |
],
|
139 |
outputs=[
|
140 |
gr.File(label="Download Output PDF"),
|
|
|
34 |
flan_t5_model = T5ForConditionalGeneration.from_pretrained(flan_t5_model_id)
|
35 |
|
36 |
# Function to transcribe audio files
|
37 |
+
def transcribe_audio(file):
|
38 |
+
result = whisper_pipe(file)
|
39 |
return result['text']
|
40 |
|
41 |
# Function to extract text and questions from PDF
|
42 |
+
def extract_text_from_pdf(pdf_file):
|
43 |
text = ""
|
44 |
questions = []
|
45 |
+
with pdfplumber.open(pdf_file) as pdf:
|
46 |
for page in pdf.pages:
|
47 |
page_text = page.extract_text()
|
48 |
if page_text:
|
|
|
113 |
# Gradio interface function
|
114 |
def process_files(audio_files, pdf_file):
|
115 |
responses = []
|
116 |
+
for audio_file in audio_files:
|
117 |
# Transcribe audio
|
118 |
transcribed_text = transcribe_audio(audio_file.name)
|
119 |
# Extract text and form fields from PDF
|
120 |
pdf_text, pdf_questions = extract_text_from_pdf(pdf_file.name)
|
121 |
# Generate form data
|
122 |
form_data = generate_form_data(transcribed_text, pdf_questions)
|
123 |
+
responses.append(form_data)
|
124 |
+
|
125 |
# Save all responses to a PDF
|
126 |
+
output_pdf_path = "output.pdf"
|
127 |
save_responses_to_pdf(responses, output_pdf_path)
|
128 |
+
|
129 |
# Return the PDF path and the generated responses
|
130 |
return output_pdf_path, "\n\n".join(responses)
|
131 |
|
|
|
133 |
interface = gr.Interface(
|
134 |
fn=process_files,
|
135 |
inputs=[
|
136 |
+
gr.Files(label="Upload Audio Dataset"),
|
137 |
+
gr.File(label="Upload PDF File with Questions")
|
138 |
],
|
139 |
outputs=[
|
140 |
gr.File(label="Download Output PDF"),
|