MediPlusPlus commited on
Commit
836e26d
·
verified ·
1 Parent(s): 5b02197

update text input

Browse files
Files changed (1) hide show
  1. app.py +13 -6
app.py CHANGED
@@ -226,17 +226,17 @@ def predict_category(que, input_image):
226
  return preds[0]
227
 
228
 
229
- def combine(audio, input_image):
230
- que = transcribe_audio(audio)
231
- # que = "What is the animal here?"
 
 
232
 
233
  image = Image.fromarray(input_image).convert('RGB')
234
  category = predict_category(que, image)
235
 
236
  answer = predict_answer(0, que, image)
237
 
238
- # print(category)
239
-
240
  tts = gTTS(answer)
241
  tts.save('answer.mp3')
242
  return que, answer, 'answer.mp3'
@@ -244,7 +244,14 @@ def combine(audio, input_image):
244
 
245
 
246
  # Define the Gradio interface for recording audio and displaying the transcription
247
- model_interface = gr.Interface(fn=combine, inputs=[gr.Microphone(label="Ask your question"),gr.Image(label="Upload the image")], outputs=[gr.Text(label="Transcribed Question"), gr.Text(label="Answer"), gr.Audio(label="Audio Answer")])
 
 
 
 
 
 
 
248
  # image_upload_interface = gr.Interface(fn=upload_image, inputs=gr.Image(label="Upload the image"), outputs="text")
249
 
250
  # Launch the Gradio interface
 
226
  return preds[0]
227
 
228
 
229
+ def combine(question, audio, input_image):
230
+ if question:
231
+ que = question
232
+ else:
233
+ que = transcribe_audio(audio)
234
 
235
  image = Image.fromarray(input_image).convert('RGB')
236
  category = predict_category(que, image)
237
 
238
  answer = predict_answer(0, que, image)
239
 
 
 
240
  tts = gTTS(answer)
241
  tts.save('answer.mp3')
242
  return que, answer, 'answer.mp3'
 
244
 
245
 
246
  # Define the Gradio interface for recording audio and displaying the transcription
247
+ model_interface = gr.Interface(fn=combine,
248
+ inputs=[gr.TextInput(label="Text Question"),
249
+ gr.Microphone(label="Audio Question"),
250
+ gr.Image(label="Upload the image")],
251
+ outputs=[gr.Text(label="Transcribed Question"),
252
+ gr.Text(label="Answer"),
253
+ gr.Audio(label="Audio Answer")])
254
+
255
  # image_upload_interface = gr.Interface(fn=upload_image, inputs=gr.Image(label="Upload the image"), outputs="text")
256
 
257
  # Launch the Gradio interface