Spaces:
Running
Running
File size: 1,612 Bytes
f9300e0 cea10a0 f9300e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import streamlit as st
from transformers import AutoProcessor, AutoModelForImageTextToText
from PIL import Image
import torch
# Load model and processor
@st.cache_resource # Cache model to avoid reloading
def load_model():
processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
model = AutoModelForImageTextToText.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
return processor, model
# Extract text from image using SmolVLM
def extract_text(image, processor, model):
# Preprocess image
inputs = processor(images=image, text="What is the text in this image? extract all data in JSON format", return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs)
result = processor.batch_decode(outputs, skip_special_tokens=True)[0]
return result
# Streamlit UI
def main():
st.title("🖼️ OCR App using SmolVLM")
st.write("Upload an image, and I will extract the text for you!")
# Load the model and processor
processor, model = load_model()
# File uploader
uploaded_file = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Open image
image = Image.open(uploaded_file).convert("RGB")
st.image(image, caption="Uploaded Image", use_column_width=True)
# Extract text
with st.spinner("Extracting text..."):
extracted_text = extract_text(image, processor, model)
# Display result
st.subheader("📝 Extracted Text:")
st.write(extracted_text)
if __name__ == "__main__":
main()
|