File size: 1,165 Bytes
46f1785
 
 
 
eb58d9d
46f1785
 
 
 
65d2683
46f1785
 
 
 
dca1a64
46f1785
 
 
dca1a64
46f1785
 
 
 
 
 
 
 
 
 
 
dca1a64
46f1785
 
dca1a64
46f1785
140e3c2
46f1785
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import streamlit as st
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import torch

# Load the pre-trained CLIP model and processor
model_name = "facebook/nougat-base"   needed
model = CLIPModel.from_pretrained(model_name)
processor = CLIPProcessor.from_pretrained(model_name)

st.title("Image to Text Conversion App")

# Input image upload
image = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])

if image:
    # Display the uploaded image
    st.image(image, caption="Uploaded Image", use_column_width=True)

    # Process the image for text conversion
    with torch.no_grad():
        inputs = processor(text="a photo of " + st.session_state["alt_text"], images=image, return_tensors="pt")
        outputs = model(**inputs)
    
    # Extract the textual description
    text_description = processor.decode(outputs["text"])
    
    # Display the text description
    st.subheader("Text Description:")
    st.write(text_description)

# Input for alternative text
alt_text = st.text_area("Provide alternative text for the image:", key="alt_text")

st.write("Powered by Hugging Face's CLIP model.")

streamlit run app.py