Spaces:

virendravaishnav
/

po-fetch-detail

Sleeping

po-fetch-detail / app.py

Updated with OCR model and Gradio integration

a02d815 about 1 year ago

1.22 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoProcessor, AutoModel
	import torch

	repo_id = "OpenGVLab/InternVL2-1B"

	# Load the tokenizer, processor, and model directly from the Hub
	tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)
	processor = AutoProcessor.from_pretrained(repo_id, trust_remote_code=True)
	model = AutoModel.from_pretrained(
	repo_id, trust_remote_code=True, torch_dtype=torch.float16
	)

	# Move model to the appropriate device
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	def analyze_image(image):
	try:
	img = image.convert("RGB")
	inputs = processor(images=img, text="describe this image", return_tensors="pt").to(device)
	outputs = model.generate(**inputs)
	return tokenizer.decode(outputs[0], skip_special_tokens=True)
	except Exception as e:
	return f"An error occurred: {str(e)}"

	demo = gr.Interface(
	fn=analyze_image,
	inputs=gr.Image(type="pil"),
	outputs="text",
	title="Image Description using InternVL2-1B",
	description="Upload an image and get a description generated by the InternVL2-1B model."
	)

	if __name__ == "__main__":
	demo.launch()