Spaces:
Build error
Build error
import streamlit as st | |
from transformers import CLIPProcessor, CLIPModel | |
from PIL import Image | |
import torch | |
# Load the pre-trained CLIP model and processor | |
model_name = "facebook/nougat-base" needed | |
model = CLIPModel.from_pretrained(model_name) | |
processor = CLIPProcessor.from_pretrained(model_name) | |
st.title("Image to Text Conversion App") | |
# Input image upload | |
image = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"]) | |
if image: | |
# Display the uploaded image | |
st.image(image, caption="Uploaded Image", use_column_width=True) | |
# Process the image for text conversion | |
with torch.no_grad(): | |
inputs = processor(text="a photo of " + st.session_state["alt_text"], images=image, return_tensors="pt") | |
outputs = model(**inputs) | |
# Extract the textual description | |
text_description = processor.decode(outputs["text"]) | |
# Display the text description | |
st.subheader("Text Description:") | |
st.write(text_description) | |
# Input for alternative text | |
alt_text = st.text_area("Provide alternative text for the image:", key="alt_text") | |
st.write("Powered by Hugging Face's CLIP model.") | |
streamlit run app.py | |