Spaces:
Running
on
Zero
Running
on
Zero
# from .demo_modelpart import InferenceDemo | |
import gradio as gr | |
import os | |
from threading import Thread | |
# import time | |
import cv2 | |
import datetime | |
# import copy | |
import torch | |
import spaces | |
import numpy as np | |
from llava.constants import DEFAULT_IMAGE_TOKEN | |
from llava.constants import ( | |
IMAGE_TOKEN_INDEX, | |
DEFAULT_IMAGE_TOKEN, | |
) | |
from llava.conversation import conv_templates, SeparatorStyle | |
from llava.model.builder import load_pretrained_model | |
from llava.utils import disable_torch_init | |
from llava.mm_utils import ( | |
tokenizer_image_token, | |
get_model_name_from_path, | |
KeywordsStoppingCriteria, | |
) | |
from serve_constants import html_header, bibtext, learn_more_markdown, tos_markdown | |
from decord import VideoReader, cpu | |
import requests | |
from PIL import Image | |
import io | |
from io import BytesIO | |
from transformers import TextStreamer, TextIteratorStreamer | |
import hashlib | |
import PIL | |
import base64 | |
import json | |
import datetime | |
import gradio as gr | |
import gradio_client | |
import subprocess | |
import sys | |
from huggingface_hub import HfApi | |
from huggingface_hub import login | |
from huggingface_hub import revision_exists | |
login(token=os.environ["HF_TOKEN"], | |
write_permission=True) | |
api = HfApi() | |
repo_name = os.environ["LOG_REPO"] | |
external_log_dir = "./logs" | |
LOGDIR = external_log_dir | |
VOTEDIR = "./votes" | |
if __name__ == "__main__": | |
import argparse | |
argparser = argparse.ArgumentParser() | |
argparser.add_argument("--server_name", default="0.0.0.0", type=str) | |
argparser.add_argument("--model_path", default="TIGER-Lab/MAmmoTH-VL2", type=str) | |
argparser.add_argument("--model-base", type=str, default=None) | |
argparser.add_argument("--num-gpus", type=int, default=1) | |
argparser.add_argument("--conv-mode", type=str, default=None) | |
argparser.add_argument("--temperature", type=float, default=0.7) | |
argparser.add_argument("--max-new-tokens", type=int, default=4096) | |
argparser.add_argument("--num_frames", type=int, default=32) | |
argparser.add_argument("--load-8bit", action="store_true") | |
argparser.add_argument("--load-4bit", action="store_true") | |
argparser.add_argument("--debug", action="store_true") | |
args = argparser.parse_args() | |
model_path = args.model_path | |
filt_invalid = "cut" | |
model_name = get_model_name_from_path(args.model_path) | |
tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, args.model_base, model_name, args.load_8bit, args.load_4bit) | |
model=model.to(torch.device('cuda')) | |
chat_image_num = 0 | |
demo.launch() |