Spaces:

umarmajeedofficial
/

Fill-Up

Sleeping

App Files Files Community

umarmajeedofficial commited on Sep 1, 2024

Commit

47e5c17

verified ·

1 Parent(s): 6694a2c

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -11

app.py CHANGED Viewed

@@ -34,15 +34,15 @@ flan_t5_tokenizer = T5Tokenizer.from_pretrained(flan_t5_model_id)
 flan_t5_model = T5ForConditionalGeneration.from_pretrained(flan_t5_model_id)
 # Function to transcribe audio files
-def transcribe_audio(file_path):
-    result = whisper_pipe(file_path)
     return result['text']
 # Function to extract text and questions from PDF
-def extract_text_from_pdf(pdf_path):
     text = ""
     questions = []
-    with pdfplumber.open(pdf_path) as pdf:
         for page in pdf.pages:
             page_text = page.extract_text()
             if page_text:
@@ -113,19 +113,19 @@ def save_responses_to_pdf(responses, output_pdf_path):
 # Gradio interface function
 def process_files(audio_files, pdf_file):
     responses = []
-    for index, audio_file in enumerate(audio_files, start=1):
         # Transcribe audio
         transcribed_text = transcribe_audio(audio_file.name)
         # Extract text and form fields from PDF
         pdf_text, pdf_questions = extract_text_from_pdf(pdf_file.name)
         # Generate form data
         form_data = generate_form_data(transcribed_text, pdf_questions)
-        responses.append(f"File {index}:\n{form_data}")
     # Save all responses to a PDF
-    output_pdf_path = "/app/output.pdf"  # Update path for Spaces environment
     save_responses_to_pdf(responses, output_pdf_path)
     # Return the PDF path and the generated responses
     return output_pdf_path, "\n\n".join(responses)
@@ -133,8 +133,8 @@ def process_files(audio_files, pdf_file):
 interface = gr.Interface(
     fn=process_files,
     inputs=[
-        gr.Files(label="Upload Audio Dataset", type="filepath"),
-        gr.File(label="Upload PDF File with Questions", type="filepath")
     ],
     outputs=[
         gr.File(label="Download Output PDF"),

 flan_t5_model = T5ForConditionalGeneration.from_pretrained(flan_t5_model_id)
 # Function to transcribe audio files
+def transcribe_audio(file):
+    result = whisper_pipe(file)
     return result['text']
 # Function to extract text and questions from PDF
+def extract_text_from_pdf(pdf_file):
     text = ""
     questions = []
+    with pdfplumber.open(pdf_file) as pdf:
         for page in pdf.pages:
             page_text = page.extract_text()
             if page_text:
 # Gradio interface function
 def process_files(audio_files, pdf_file):
     responses = []
+    for audio_file in audio_files:
         # Transcribe audio
         transcribed_text = transcribe_audio(audio_file.name)
         # Extract text and form fields from PDF
         pdf_text, pdf_questions = extract_text_from_pdf(pdf_file.name)
         # Generate form data
         form_data = generate_form_data(transcribed_text, pdf_questions)
+        responses.append(form_data)
     # Save all responses to a PDF
+    output_pdf_path = "output.pdf"
     save_responses_to_pdf(responses, output_pdf_path)
     # Return the PDF path and the generated responses
     return output_pdf_path, "\n\n".join(responses)
 interface = gr.Interface(
     fn=process_files,
     inputs=[
+        gr.Files(label="Upload Audio Dataset"),
+        gr.File(label="Upload PDF File with Questions")
     ],
     outputs=[
         gr.File(label="Download Output PDF"),