Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import spaces | |
from cumo.model.builder import load_pretrained_model | |
from cumo.mm_utils import process_images, tokenizer_image_token | |
from cumo.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN | |
import torch | |
from PIL import Image | |
model_path = "BenkHel/CumoThesis" | |
model_base = None | |
model_name = "CumoThesis" # oder "BenkHel/CumoThesis" | |
load_8bit = False | |
load_4bit = False | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
tokenizer, model, image_processor, context_len = load_pretrained_model( | |
model_path, model_base, model_name, load_8bit, load_4bit, device, use_flash_attn=False | |
) | |
PROMPT = "What material is this item and how is it disposed of?" | |
PROMPT_WITH_IMAGE = f"{DEFAULT_IMAGE_TOKEN} {PROMPT}" | |
def classify_image(image): | |
if image is None: | |
return "Please upload an image." | |
if not isinstance(image, Image.Image): | |
image = Image.fromarray(image) | |
images = process_images([image], image_processor, model.config) | |
images = [img.to(device, dtype=torch.float16) for img in images] | |
image_args = {"images": images} | |
input_ids = tokenizer_image_token(PROMPT_WITH_IMAGE, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).to(device) | |
with torch.no_grad(): | |
outputs = model.generate( | |
inputs=input_ids, | |
max_new_tokens=128, | |
pad_token_id=tokenizer.eos_token_id, | |
**image_args | |
) | |
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
answer = output_text[len(PROMPT):].strip() if output_text.startswith(PROMPT) else output_text | |
return answer | |
iface = gr.Interface( | |
fn=classify_image, | |
inputs=gr.Image(type="pil", label="Upload an image of a waste item"), | |
outputs=gr.Textbox(label="Classification & Disposal Recommendation"), | |
title="CuMo Waste Classifier", | |
description="Upload a photo of a household waste item. The model will classify the material and recommend how to dispose of it." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |