import gradio as gr import requests from PIL import Image from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor import spaces @spaces.GPU def infer_infographics(image, question): model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-ai2d-base") processor = Pix2StructProcessor.from_pretrained("google/pix2struct-ai2d-base") inputs = processor(images=image, text=question, return_tensors="pt") predictions = model.generate(**inputs) return processor.decode(predictions[0], skip_special_tokens=True) @spaces.GPU def infer_ui(image, question): model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-screen2words-base") processor = Pix2StructProcessor.from_pretrained("google/pix2struct-screen2words-base") inputs = processor(images=image,text=question, return_tensors="pt") predictions = model.generate(**inputs) return processor.decode(predictions[0], skip_special_tokens=True) @spaces.GPU def infer_chart(image, question): model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-chartqa-base") processor = Pix2StructProcessor.from_pretrained("google/pix2struct-chartqa-base") inputs = processor(images=image, text=question, return_tensors="pt") predictions = model.generate(**inputs) return processor.decode(predictions[0], skip_special_tokens=True) @spaces.GPU def infer_doc(image, question): model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-docvqa-base") processor = Pix2StructProcessor.from_pretrained("google/pix2struct-docvqa-base") inputs = processor(images=image, text=question, return_tensors="pt") predictions = model.generate(**inputs) return processor.decode(predictions[0], skip_special_tokens=True) css = """ #mkd { height: 500px; overflow: auto; border: 1px solid #ccc; } """ with gr.Blocks(css=css) as demo: gr.HTML("