Spaces:
Build error
Build error
| import streamlit as st | |
| from transformers import CLIPProcessor, CLIPModel | |
| from PIL import Image | |
| import torch | |
| # Load the pre-trained CLIP model and processor | |
| model_name = "facebook/nougat-base" needed | |
| model = CLIPModel.from_pretrained(model_name) | |
| processor = CLIPProcessor.from_pretrained(model_name) | |
| st.title("Image to Text Conversion App") | |
| # Input image upload | |
| image = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"]) | |
| if image: | |
| # Display the uploaded image | |
| st.image(image, caption="Uploaded Image", use_column_width=True) | |
| # Process the image for text conversion | |
| with torch.no_grad(): | |
| inputs = processor(text="a photo of " + st.session_state["alt_text"], images=image, return_tensors="pt") | |
| outputs = model(**inputs) | |
| # Extract the textual description | |
| text_description = processor.decode(outputs["text"]) | |
| # Display the text description | |
| st.subheader("Text Description:") | |
| st.write(text_description) | |
| # Input for alternative text | |
| alt_text = st.text_area("Provide alternative text for the image:", key="alt_text") | |
| st.write("Powered by Hugging Face's CLIP model.") | |
| streamlit run app.py | |