Spaces:

Akshayram1
/

smol_vlm_ocr

Running

smol_vlm_ocr / app.py

Update app.py

cea10a0 verified 7 months ago

1.61 kB

	import streamlit as st
	from transformers import AutoProcessor, AutoModelForImageTextToText
	from PIL import Image
	import torch

	# Load model and processor
	@st.cache_resource # Cache model to avoid reloading
	def load_model():
	processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
	model = AutoModelForImageTextToText.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
	return processor, model

	# Extract text from image using SmolVLM
	def extract_text(image, processor, model):
	# Preprocess image
	inputs = processor(images=image, text="What is the text in this image? extract all data in JSON format", return_tensors="pt")

	with torch.no_grad():
	outputs = model.generate(**inputs)

	result = processor.batch_decode(outputs, skip_special_tokens=True)[0]
	return result

	# Streamlit UI
	def main():
	st.title("🖼️ OCR App using SmolVLM")
	st.write("Upload an image, and I will extract the text for you!")

	# Load the model and processor
	processor, model = load_model()

	# File uploader
	uploaded_file = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])

	if uploaded_file is not None:
	# Open image
	image = Image.open(uploaded_file).convert("RGB")
	st.image(image, caption="Uploaded Image", use_column_width=True)

	# Extract text
	with st.spinner("Extracting text..."):
	extracted_text = extract_text(image, processor, model)

	# Display result
	st.subheader("📝 Extracted Text:")
	st.write(extracted_text)

	if __name__ == "__main__":
	main()