umarmajeedofficial commited on
Commit
47e5c17
·
verified ·
1 Parent(s): 6694a2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -11
app.py CHANGED
@@ -34,15 +34,15 @@ flan_t5_tokenizer = T5Tokenizer.from_pretrained(flan_t5_model_id)
34
  flan_t5_model = T5ForConditionalGeneration.from_pretrained(flan_t5_model_id)
35
 
36
  # Function to transcribe audio files
37
- def transcribe_audio(file_path):
38
- result = whisper_pipe(file_path)
39
  return result['text']
40
 
41
  # Function to extract text and questions from PDF
42
- def extract_text_from_pdf(pdf_path):
43
  text = ""
44
  questions = []
45
- with pdfplumber.open(pdf_path) as pdf:
46
  for page in pdf.pages:
47
  page_text = page.extract_text()
48
  if page_text:
@@ -113,19 +113,19 @@ def save_responses_to_pdf(responses, output_pdf_path):
113
  # Gradio interface function
114
  def process_files(audio_files, pdf_file):
115
  responses = []
116
- for index, audio_file in enumerate(audio_files, start=1):
117
  # Transcribe audio
118
  transcribed_text = transcribe_audio(audio_file.name)
119
  # Extract text and form fields from PDF
120
  pdf_text, pdf_questions = extract_text_from_pdf(pdf_file.name)
121
  # Generate form data
122
  form_data = generate_form_data(transcribed_text, pdf_questions)
123
- responses.append(f"File {index}:\n{form_data}")
124
-
125
  # Save all responses to a PDF
126
- output_pdf_path = "/app/output.pdf" # Update path for Spaces environment
127
  save_responses_to_pdf(responses, output_pdf_path)
128
-
129
  # Return the PDF path and the generated responses
130
  return output_pdf_path, "\n\n".join(responses)
131
 
@@ -133,8 +133,8 @@ def process_files(audio_files, pdf_file):
133
  interface = gr.Interface(
134
  fn=process_files,
135
  inputs=[
136
- gr.Files(label="Upload Audio Dataset", type="filepath"),
137
- gr.File(label="Upload PDF File with Questions", type="filepath")
138
  ],
139
  outputs=[
140
  gr.File(label="Download Output PDF"),
 
34
  flan_t5_model = T5ForConditionalGeneration.from_pretrained(flan_t5_model_id)
35
 
36
  # Function to transcribe audio files
37
+ def transcribe_audio(file):
38
+ result = whisper_pipe(file)
39
  return result['text']
40
 
41
  # Function to extract text and questions from PDF
42
+ def extract_text_from_pdf(pdf_file):
43
  text = ""
44
  questions = []
45
+ with pdfplumber.open(pdf_file) as pdf:
46
  for page in pdf.pages:
47
  page_text = page.extract_text()
48
  if page_text:
 
113
  # Gradio interface function
114
  def process_files(audio_files, pdf_file):
115
  responses = []
116
+ for audio_file in audio_files:
117
  # Transcribe audio
118
  transcribed_text = transcribe_audio(audio_file.name)
119
  # Extract text and form fields from PDF
120
  pdf_text, pdf_questions = extract_text_from_pdf(pdf_file.name)
121
  # Generate form data
122
  form_data = generate_form_data(transcribed_text, pdf_questions)
123
+ responses.append(form_data)
124
+
125
  # Save all responses to a PDF
126
+ output_pdf_path = "output.pdf"
127
  save_responses_to_pdf(responses, output_pdf_path)
128
+
129
  # Return the PDF path and the generated responses
130
  return output_pdf_path, "\n\n".join(responses)
131
 
 
133
  interface = gr.Interface(
134
  fn=process_files,
135
  inputs=[
136
+ gr.Files(label="Upload Audio Dataset"),
137
+ gr.File(label="Upload PDF File with Questions")
138
  ],
139
  outputs=[
140
  gr.File(label="Download Output PDF"),