Steven-GU-Yu-Di commited on
Commit
7f1b559
·
verified ·
1 Parent(s): aac1866

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline, Text2SpeechPipeline, VisualQAProcessor
3
+ from PIL import Image
4
+
5
+ # Load the text classification model
6
+ classifier = pipeline("text-classification")
7
+ # Load the Visual Question Answering (VQA) model
8
+ vqa_model = VisualQAProcessor.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
9
+ # Load the Text-to-Speech model
10
+ tts_model = Text2SpeechPipeline("facebook/wav2vec2-base-960h")
11
+
12
+ # Create a Streamlit app
13
+ st.title("Image, Text, and Speech Classification")
14
+
15
+ # Sidebar for user inputs
16
+ st.sidebar.title("Input")
17
+ uploaded_image = st.sidebar.file_uploader("Upload Image", type=["jpg", "jpeg", "png"])
18
+ text_input = st.sidebar.text_input("Enter Text Description")
19
+ question_input = st.sidebar.text_input("Enter Question for Image")
20
+
21
+ # Function to classify image and text
22
+ def classify(image, text, question):
23
+ if image is not None and text:
24
+ image = Image.open(image)
25
+ st.image(image, caption="Uploaded Image", use_column_width=True)
26
+ st.write("Text Description:", text)
27
+ st.write("Question for Image:", question)
28
+
29
+ # Text classification
30
+ text_result = classifier(text)
31
+ st.write("Text Classification Result:")
32
+ st.write(text_result)
33
+
34
+ # Visual Question Answering
35
+ vqa_input = {
36
+ "question": question,
37
+ "context": text_result[0]['label'],
38
+ }
39
+ vqa_output = vqa_model(vqa_input)
40
+ st.write("Visual Question Answering Result:")
41
+ st.write(vqa_output)
42
+
43
+ # Text-to-Speech
44
+ tts_input = vqa_output['answer']
45
+ tts_output = tts_model(tts_input)
46
+ st.audio(tts_output[0]['audio'], format='audio/wav')
47
+
48
+ # Button to trigger classification
49
+ if st.sidebar.button("Classify"):
50
+ classify(uploaded_image, text_input, question_input)