Tapanat commited on
Commit
46f1785
·
1 Parent(s): 9a38909

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -22
app.py CHANGED
@@ -1,29 +1,37 @@
1
- pip install --upgrade pip
2
- python -m venv venv_name # Create a virtual environment (if not already created)
3
- source venv_name/bin/activate # Activate the virtual environment (Linux/macOS)
4
- pip install --upgrade pip
5
 
 
 
 
 
6
 
7
- import streamlit as st
8
- from transformers import BartForConditionalGeneration, BartTokenizer
 
 
9
 
10
- # Load the pre-trained BART model and tokenizer
11
- model_name = "csebuetnlp/mT5_multilingual_XLSum"
12
- model = BartForConditionalGeneration.from_pretrained(model_name)
13
- tokenizer = BartTokenizer.from_pretrained(model_name)
14
 
15
- st.title("Text Summarization App")
 
 
 
 
 
 
 
 
 
 
16
 
17
- # Input text area for user input
18
- input_text = st.text_area("Enter text to summarize:")
19
 
20
- if st.button("Summarize"):
21
- if input_text:
22
- # Tokenize and summarize the input text
23
- inputs = tokenizer(input_text, return_tensors="pt", max_length=1024, truncation=True)
24
- summary_ids = model.generate(inputs["input_ids"], max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
25
- summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
26
- st.subheader("Summary:")
27
- st.write(summary)
28
 
29
- st.write("Powered by Hugging Face's BART model.")
 
1
+ import streamlit as st
2
+ from transformers import CLIPProcessor, CLIPModel
3
+ from PIL import Image
4
+ import torch
5
 
6
+ # Load the pre-trained CLIP model and processor
7
+ model_name = "facebook/nougat-base" needed
8
+ model = CLIPModel.from_pretrained(model_name)
9
+ processor = CLIPProcessor.from_pretrained(model_name)
10
 
11
+ st.title("Image to Text Conversion App")
12
+
13
+ # Input image upload
14
+ image = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
15
 
16
+ if image:
17
+ # Display the uploaded image
18
+ st.image(image, caption="Uploaded Image", use_column_width=True)
 
19
 
20
+ # Process the image for text conversion
21
+ with torch.no_grad():
22
+ inputs = processor(text="a photo of " + st.session_state["alt_text"], images=image, return_tensors="pt")
23
+ outputs = model(**inputs)
24
+
25
+ # Extract the textual description
26
+ text_description = processor.decode(outputs["text"])
27
+
28
+ # Display the text description
29
+ st.subheader("Text Description:")
30
+ st.write(text_description)
31
 
32
+ # Input for alternative text
33
+ alt_text = st.text_area("Provide alternative text for the image:", key="alt_text")
34
 
35
+ st.write("Powered by Hugging Face's CLIP model.")
 
 
 
 
 
 
 
36
 
37
+ streamlit run app.py