Spaces:

theaiinstitute
/

theia

Running on Zero

File size: 1,756 Bytes

e35c029
b194803
e35c029
c62ab28
e35c029
 
 
 
0d4e389
e35c029
c970642
b194803
e35c029
 
 
 
 
 
 
6d5e85a
e35c029
b033d86
e35c029
b033d86
8232dd0
f7c781f
e35c029
 
 
 
 
 
 
 
 
 
 
 
 
b033d86
e35c029
 
4342a51
 
e35c029
cb45d23
e35c029
47c37ba
e35c029

import gradio as gr
import spaces
import torch
import torchvision.transforms
import numpy as np
from transformers import AutoModel
from theia.decoding import load_feature_stats, prepare_depth_decoder, prepare_mask_generator, decode_everything

@spaces.GPU()
def run_theia(image):
    theia_model = AutoModel.from_pretrained("theaiinstitute/theia-tiny-patch16-224-cddsv", trust_remote_code=True)
    theia_model = theia_model.to('cuda')
    target_model_names = [
        "google/vit-huge-patch14-224-in21k",
        "facebook/dinov2-large",
        "openai/clip-vit-large-patch14",
        "facebook/sam-vit-huge",
        "LiheYoung/depth-anything-large-hf",
    ]
    feature_means, feature_vars = load_feature_stats(target_model_names, stat_file_root="feature_stats")
    
    mask_generator, sam_model = prepare_mask_generator('cuda')
    depth_anything_model_name = "LiheYoung/depth-anything-large-hf"
    depth_anything_decoder, _ = prepare_depth_decoder(depth_anything_model_name, 'cuda')

    image = torchvision.transforms.Resize(size=(224, 224))(image)
    images = [image]
    
    theia_decode_results, gt_decode_results = decode_everything(
        theia_model=theia_model,
        feature_means=feature_means,
        feature_vars=feature_vars,
        images=images,
        mask_generator=mask_generator,
        sam_model=sam_model,
        depth_anything_decoder=depth_anything_decoder,
        pred_iou_thresh=0.5,
        stability_score_thresh=0.7,
        gt=True,
        device='cuda',
    )
    
    vis = np.vstack([theia_decode_results[0], gt_decode_results[0]])
    vis = (255.0 * vis).astype(np.uint8)

    return vis

demo = gr.Interface(fn=run_theia, inputs=gr.Image(type="pil"), outputs=gr.Image(type="numpy"))
demo.launch()