new
Browse files- app.py +196 -6
- llava/__pycache__/__init__.cpython-310.pyc +0 -0
- llava/__pycache__/constants.cpython-310.pyc +0 -0
- llava/__pycache__/conversation.cpython-310.pyc +0 -0
- llava/__pycache__/mm_utils.cpython-310.pyc +0 -0
- llava/__pycache__/utils.cpython-310.pyc +0 -0
- llava/model/__pycache__/__init__.cpython-310.pyc +0 -0
- llava/model/__pycache__/builder.cpython-310.pyc +0 -0
- llava/model/__pycache__/llava_arch.cpython-310.pyc +0 -0
- llava/model/language_model/__pycache__/llava_llama.cpython-310.pyc +0 -0
- llava/model/language_model/__pycache__/llava_mistral.cpython-310.pyc +0 -0
- llava/model/language_model/__pycache__/llava_mpt.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/__pycache__/builder.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/__pycache__/clip_encoder.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/__pycache__/imagebind.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/__pycache__/open_clip_encoder.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/__pycache__/siglip_encoder.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/__pycache__/eva_vit.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/__init__.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/constants.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/eva_vit_model.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/factory.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_configs.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_model.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/loss.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/model.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/modified_resnet.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/openai.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/pretrained.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/rope.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/timm_model.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/tokenizer.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transform.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transformer.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/utils.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_encoder.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_processors.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/eva_clip/__pycache__/eva_vit.cpython-310.pyc +0 -0
- llava/model/multimodal_encoder/eva_clip/__pycache__/factory.cpython-310.pyc +0 -0
- llava/model/multimodal_projector/__pycache__/builder.cpython-310.pyc +0 -0
- llava/train/__pycache__/llava_trainer.cpython-310.pyc +0 -0
- llava/train/__pycache__/train.cpython-310.pyc +0 -0
- requirements.txt +2 -1
app.py
CHANGED
@@ -31,9 +31,7 @@ enable_btn = gr.Button(interactive=True)
|
|
31 |
disable_btn = gr.Button(interactive=False)
|
32 |
|
33 |
argparser = argparse.ArgumentParser()
|
34 |
-
argparser.add_argument("--
|
35 |
-
argparser.add_argument("--port", default="6324", type=str)
|
36 |
-
argparser.add_argument("--model-path", default="umd-vt-nyu/clip-evaclip-und-gen-sft-3v", type=str)
|
37 |
argparser.add_argument("--model-base", type=str, default=None)
|
38 |
argparser.add_argument("--num-gpus", type=int, default=1)
|
39 |
argparser.add_argument("--conv-mode", type=str, default="llama3")
|
@@ -49,7 +47,7 @@ model_path = args.model_path
|
|
49 |
conv_mode = args.conv_mode
|
50 |
filt_invalid="cut"
|
51 |
model_name = get_model_name_from_path(args.model_path)
|
52 |
-
model_name = 'clip-evaclip-und-gen-
|
53 |
model_kwargs = {
|
54 |
"use_cache": False,
|
55 |
"trust_remote_code": True,
|
@@ -262,7 +260,7 @@ with gr.Blocks(title="llava", theme=gr.themes.Default(), css=block_css) as demo:
|
|
262 |
upvote_btn = gr.Button(value="👍 Upvote", interactive=False)
|
263 |
downvote_btn = gr.Button(value="👎 Downvote", interactive=False)
|
264 |
flag_btn = gr.Button(value="⚠️ Flag", interactive=False)
|
265 |
-
|
266 |
regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False)
|
267 |
clear_btn = gr.Button(value="🗑️ Clear", interactive=False)
|
268 |
|
@@ -327,4 +325,196 @@ with gr.Blocks(title="llava", theme=gr.themes.Default(), css=block_css) as demo:
|
|
327 |
demo.queue(
|
328 |
status_update_rate=10,
|
329 |
api_open=False
|
330 |
-
).launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
disable_btn = gr.Button(interactive=False)
|
32 |
|
33 |
argparser = argparse.ArgumentParser()
|
34 |
+
argparser.add_argument("--model-path", default="umd-vt-nyu/clip-evaclip-und-gen-sft", type=str)
|
|
|
|
|
35 |
argparser.add_argument("--model-base", type=str, default=None)
|
36 |
argparser.add_argument("--num-gpus", type=int, default=1)
|
37 |
argparser.add_argument("--conv-mode", type=str, default="llama3")
|
|
|
47 |
conv_mode = args.conv_mode
|
48 |
filt_invalid="cut"
|
49 |
model_name = get_model_name_from_path(args.model_path)
|
50 |
+
model_name = 'clip-evaclip-und-gen-sft'
|
51 |
model_kwargs = {
|
52 |
"use_cache": False,
|
53 |
"trust_remote_code": True,
|
|
|
260 |
upvote_btn = gr.Button(value="👍 Upvote", interactive=False)
|
261 |
downvote_btn = gr.Button(value="👎 Downvote", interactive=False)
|
262 |
flag_btn = gr.Button(value="⚠️ Flag", interactive=False)
|
263 |
+
stop_btn = gr.Button(value="⏹️ Stop Generation", interactive=False)
|
264 |
regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False)
|
265 |
clear_btn = gr.Button(value="🗑️ Clear", interactive=False)
|
266 |
|
|
|
325 |
demo.queue(
|
326 |
status_update_rate=10,
|
327 |
api_open=False
|
328 |
+
).launch()
|
329 |
+
|
330 |
+
|
331 |
+
|
332 |
+
|
333 |
+
|
334 |
+
|
335 |
+
|
336 |
+
|
337 |
+
|
338 |
+
# import gradio as gr
|
339 |
+
# import os
|
340 |
+
# import torch
|
341 |
+
# import argparse
|
342 |
+
# from transformers import TextIteratorStreamer
|
343 |
+
# from threading import Thread
|
344 |
+
# from PIL import Image
|
345 |
+
# from llava import conversation as conversation_lib
|
346 |
+
# from llava.constants import *
|
347 |
+
# from llava.conversation import conv_templates, SeparatorStyle
|
348 |
+
# from llava.model.builder import load_pretrained_model
|
349 |
+
# from llava.utils import disable_torch_init
|
350 |
+
# from llava.mm_utils import tokenizer_image_token, get_model_name_from_path, process_images
|
351 |
+
# from diffusers import DiffusionPipeline
|
352 |
+
|
353 |
+
# # Define paths and configurations
|
354 |
+
# # diffusion_path = "/export/jchen169/hub/models--BAAI--Emu2-Gen/snapshots/a41a2dcd777a68225dddc72c7213b064ee06f4a0"
|
355 |
+
|
356 |
+
# argparser = argparse.ArgumentParser()
|
357 |
+
# argparser.add_argument("--model-path", default="umd-vt-nyu/clip-evaclip-und-gen-sft-3v", type=str)
|
358 |
+
# argparser.add_argument("--conv-mode", type=str, default="llama3")
|
359 |
+
# argparser.add_argument("--temperature", type=float, default=0.2)
|
360 |
+
# argparser.add_argument("--max-new-tokens", type=int, default=64)
|
361 |
+
# argparser.add_argument("--num_frames", type=int, default=16)
|
362 |
+
# argparser.add_argument("--load-8bit", action="store_true")
|
363 |
+
# argparser.add_argument("--load-4bit", action="store_true")
|
364 |
+
# argparser.add_argument("--debug", action="store_true")
|
365 |
+
# args = argparser.parse_args()
|
366 |
+
|
367 |
+
# # Load LLaVA model
|
368 |
+
# disable_torch_init()
|
369 |
+
# model_name = get_model_name_from_path(args.model_path)
|
370 |
+
# tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, None, model_name)
|
371 |
+
# our_chatbot = None
|
372 |
+
|
373 |
+
# # Load Diffusion model for image generation
|
374 |
+
# pipe = DiffusionPipeline.from_pretrained(
|
375 |
+
# 'BAAI/Emu2-Gen',
|
376 |
+
# custom_pipeline="pipeline_llava_gen",
|
377 |
+
# torch_dtype=torch.bfloat16,
|
378 |
+
# use_safetensors=True,
|
379 |
+
# variant="bf16",
|
380 |
+
# multimodal_encoder=model,
|
381 |
+
# tokenizer=tokenizer,
|
382 |
+
# )
|
383 |
+
# pipe.vae.to("cuda:0")
|
384 |
+
# pipe.unet.to("cuda:0")
|
385 |
+
# pipe.safety_checker.to("cuda:0")
|
386 |
+
|
387 |
+
# def upvote_last_response(state):
|
388 |
+
# return ("",) + (disable_btn,) * 3
|
389 |
+
|
390 |
+
# def downvote_last_response(state):
|
391 |
+
# return ("",) + (disable_btn,) * 3
|
392 |
+
|
393 |
+
# def flag_last_response(state):
|
394 |
+
# return ("",) + (disable_btn,) * 3
|
395 |
+
|
396 |
+
# def clear_history():
|
397 |
+
# state = conv_templates[conv_mode].copy()
|
398 |
+
# return (state, state.to_gradio_chatbot(), "", None) + (disable_btn,) * 5
|
399 |
+
|
400 |
+
# def add_text(state, imagebox, textbox, image_process_mode):
|
401 |
+
# if state is None:
|
402 |
+
# state = conv_templates[conv_mode].copy()
|
403 |
+
|
404 |
+
# if imagebox is not None:
|
405 |
+
# textbox = DEFAULT_IMAGE_TOKEN + '\n' + textbox
|
406 |
+
# image = Image.open(imagebox).convert('RGB')
|
407 |
+
# if imagebox is not None:
|
408 |
+
# textbox = (textbox, image, image_process_mode)
|
409 |
+
|
410 |
+
# state.append_message(state.roles[0], textbox)
|
411 |
+
# state.append_message(state.roles[1], None)
|
412 |
+
|
413 |
+
# yield (state, state.to_gradio_chatbot(), "", None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
|
414 |
+
|
415 |
+
# def generate(state, imagebox, textbox, image_process_mode, temperature, top_p, max_output_tokens):
|
416 |
+
# prompt = state.get_prompt()
|
417 |
+
# images = state.get_images(return_pil=True)
|
418 |
+
# ori_prompt = prompt
|
419 |
+
# num_image_tokens = 0
|
420 |
+
|
421 |
+
# if images is not None and len(images) > 0:
|
422 |
+
# if len(images) > 0:
|
423 |
+
# if len(images) != prompt.count(DEFAULT_IMAGE_TOKEN):
|
424 |
+
# raise ValueError("Number of images does not match number of <image> tokens in prompt")
|
425 |
+
|
426 |
+
# image_sizes = [image.size for image in images]
|
427 |
+
# images = process_images(images, image_processor, model.config)
|
428 |
+
# if type(images) is list:
|
429 |
+
# images = [image.to(model.device, dtype=torch.float16) for image in images]
|
430 |
+
# else:
|
431 |
+
# images = images.to(model.device, dtype=torch.float16)
|
432 |
+
# else:
|
433 |
+
# images = None
|
434 |
+
# image_sizes = None
|
435 |
+
# image_args = {"images": images, "image_sizes": image_sizes}
|
436 |
+
# else:
|
437 |
+
# images = None
|
438 |
+
# image_args = {}
|
439 |
+
|
440 |
+
# max_context_length = getattr(model.config, 'max_position_embeddings', 2048)
|
441 |
+
# max_new_tokens = 512
|
442 |
+
# do_sample = True if temperature > 0.001 else False
|
443 |
+
# stop_str = state.sep if state.sep_style in [SeparatorStyle.SINGLE, SeparatorStyle.MPT] else state.sep2
|
444 |
+
|
445 |
+
# input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_IDX, return_tensors='pt').unsqueeze(0).to(model.device)
|
446 |
+
# streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15)
|
447 |
+
# max_new_tokens = min(max_new_tokens, max_context_length - input_ids.shape[-1] - num_image_tokens)
|
448 |
+
|
449 |
+
# if max_new_tokens < 1:
|
450 |
+
# return
|
451 |
+
|
452 |
+
# thread = Thread(target=model.generate, kwargs=dict(
|
453 |
+
# inputs=input_ids,
|
454 |
+
# do_sample=do_sample,
|
455 |
+
# temperature=temperature,
|
456 |
+
# top_p=top_p,
|
457 |
+
# max_new_tokens=max_new_tokens,
|
458 |
+
# streamer=streamer,
|
459 |
+
# use_cache=True,
|
460 |
+
# pad_token_id=tokenizer.eos_token_id,
|
461 |
+
# **image_args
|
462 |
+
# ))
|
463 |
+
# thread.start()
|
464 |
+
# generated_text = ''
|
465 |
+
# for new_text in streamer:
|
466 |
+
# generated_text += new_text
|
467 |
+
# if generated_text.endswith(stop_str):
|
468 |
+
# generated_text = generated_text[:-len(stop_str)]
|
469 |
+
# state.messages[-1][-1] = generated_text
|
470 |
+
# yield (state, state.to_gradio_chatbot(), "", None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
|
471 |
+
|
472 |
+
# yield (state, state.to_gradio_chatbot(), "", None) + (enable_btn,) * 5
|
473 |
+
# torch.cuda.empty_cache()
|
474 |
+
|
475 |
+
# def add_template(prompt):
|
476 |
+
# conv = conv_templates['llama3'].copy()
|
477 |
+
# conv.append_message(conv.roles[0], prompt[0])
|
478 |
+
# conv.append_message(conv.roles[1], None)
|
479 |
+
# prompt = conv.get_prompt()
|
480 |
+
# return [prompt]
|
481 |
+
|
482 |
+
|
483 |
+
# def generate_image(prompt):
|
484 |
+
# prompt = add_template(prompt)
|
485 |
+
# gen_img = pipe(prompt, guidance_scale=3.0)
|
486 |
+
# return gen_img.image
|
487 |
+
|
488 |
+
# # Interface setup
|
489 |
+
# with gr.Blocks(title="LLaVA Chatbot with Image Generation") as demo:
|
490 |
+
# state = gr.State()
|
491 |
+
# gr.Markdown("# LLaVA Chatbot with Image Generation")
|
492 |
+
|
493 |
+
# with gr.Row():
|
494 |
+
# with gr.Column(scale=3):
|
495 |
+
# imagebox = gr.Image(label="Input Image", type="filepath")
|
496 |
+
# image_process_mode = gr.Radio(
|
497 |
+
# ["Crop", "Resize", "Pad", "Default"],
|
498 |
+
# value="Default",
|
499 |
+
# label="Preprocess for non-square image", visible=False)
|
500 |
+
# temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature")
|
501 |
+
# top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P")
|
502 |
+
# max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens")
|
503 |
+
# with gr.Column(scale=8):
|
504 |
+
# chatbot = gr.Chatbot(label="LLaVA Chatbot", height=650, layout="panel")
|
505 |
+
# textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
|
506 |
+
# submit_btn = gr.Button(value="Send", variant="primary")
|
507 |
+
|
508 |
+
# with gr.Row() as button_row:
|
509 |
+
# clear_btn = gr.Button(value="🗑️ Clear", interactive=False)
|
510 |
+
|
511 |
+
# # Define actions
|
512 |
+
# submit_btn.click(
|
513 |
+
# lambda state, imagebox, textbox, image_process_mode, temperature, top_p, max_output_tokens: (
|
514 |
+
# generate_image([textbox]) if "generate image" in textbox.lower() else add_text(
|
515 |
+
# state, imagebox, textbox, image_process_mode)),
|
516 |
+
# [state, imagebox, textbox, image_process_mode, temperature, top_p, max_output_tokens],
|
517 |
+
# [state, chatbot, textbox, imagebox]
|
518 |
+
# )
|
519 |
+
|
520 |
+
# demo.queue(status_update_rate=10, api_open=False).launch()
|
llava/__pycache__/__init__.cpython-310.pyc
CHANGED
Binary files a/llava/__pycache__/__init__.cpython-310.pyc and b/llava/__pycache__/__init__.cpython-310.pyc differ
|
|
llava/__pycache__/constants.cpython-310.pyc
CHANGED
Binary files a/llava/__pycache__/constants.cpython-310.pyc and b/llava/__pycache__/constants.cpython-310.pyc differ
|
|
llava/__pycache__/conversation.cpython-310.pyc
CHANGED
Binary files a/llava/__pycache__/conversation.cpython-310.pyc and b/llava/__pycache__/conversation.cpython-310.pyc differ
|
|
llava/__pycache__/mm_utils.cpython-310.pyc
CHANGED
Binary files a/llava/__pycache__/mm_utils.cpython-310.pyc and b/llava/__pycache__/mm_utils.cpython-310.pyc differ
|
|
llava/__pycache__/utils.cpython-310.pyc
CHANGED
Binary files a/llava/__pycache__/utils.cpython-310.pyc and b/llava/__pycache__/utils.cpython-310.pyc differ
|
|
llava/model/__pycache__/__init__.cpython-310.pyc
CHANGED
Binary files a/llava/model/__pycache__/__init__.cpython-310.pyc and b/llava/model/__pycache__/__init__.cpython-310.pyc differ
|
|
llava/model/__pycache__/builder.cpython-310.pyc
CHANGED
Binary files a/llava/model/__pycache__/builder.cpython-310.pyc and b/llava/model/__pycache__/builder.cpython-310.pyc differ
|
|
llava/model/__pycache__/llava_arch.cpython-310.pyc
CHANGED
Binary files a/llava/model/__pycache__/llava_arch.cpython-310.pyc and b/llava/model/__pycache__/llava_arch.cpython-310.pyc differ
|
|
llava/model/language_model/__pycache__/llava_llama.cpython-310.pyc
CHANGED
Binary files a/llava/model/language_model/__pycache__/llava_llama.cpython-310.pyc and b/llava/model/language_model/__pycache__/llava_llama.cpython-310.pyc differ
|
|
llava/model/language_model/__pycache__/llava_mistral.cpython-310.pyc
CHANGED
Binary files a/llava/model/language_model/__pycache__/llava_mistral.cpython-310.pyc and b/llava/model/language_model/__pycache__/llava_mistral.cpython-310.pyc differ
|
|
llava/model/language_model/__pycache__/llava_mpt.cpython-310.pyc
CHANGED
Binary files a/llava/model/language_model/__pycache__/llava_mpt.cpython-310.pyc and b/llava/model/language_model/__pycache__/llava_mpt.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/__pycache__/builder.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/__pycache__/builder.cpython-310.pyc and b/llava/model/multimodal_encoder/__pycache__/builder.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/__pycache__/clip_encoder.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/__pycache__/clip_encoder.cpython-310.pyc and b/llava/model/multimodal_encoder/__pycache__/clip_encoder.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/__pycache__/imagebind.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/__pycache__/imagebind.cpython-310.pyc and b/llava/model/multimodal_encoder/__pycache__/imagebind.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/__pycache__/open_clip_encoder.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/__pycache__/open_clip_encoder.cpython-310.pyc and b/llava/model/multimodal_encoder/__pycache__/open_clip_encoder.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/__pycache__/siglip_encoder.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/__pycache__/siglip_encoder.cpython-310.pyc and b/llava/model/multimodal_encoder/__pycache__/siglip_encoder.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/__pycache__/eva_vit.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/__pycache__/eva_vit.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/__pycache__/eva_vit.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/__init__.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/__init__.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/__init__.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/constants.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/constants.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/constants.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/eva_vit_model.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/eva_vit_model.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/eva_vit_model.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/factory.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/factory.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/factory.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_configs.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_configs.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_configs.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_model.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_model.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_model.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/loss.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/loss.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/loss.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/model.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/model.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/model.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/modified_resnet.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/modified_resnet.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/modified_resnet.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/openai.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/openai.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/openai.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/pretrained.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/pretrained.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/pretrained.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/rope.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/rope.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/rope.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/timm_model.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/timm_model.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/timm_model.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/tokenizer.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/tokenizer.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/tokenizer.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transform.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transform.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transform.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transformer.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transformer.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transformer.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/utils.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/utils.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/utils.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_encoder.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_encoder.cpython-310.pyc and b/llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_encoder.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_processors.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_processors.cpython-310.pyc and b/llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_processors.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/eva_clip/__pycache__/eva_vit.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/eva_clip/__pycache__/eva_vit.cpython-310.pyc and b/llava/model/multimodal_encoder/eva_clip/__pycache__/eva_vit.cpython-310.pyc differ
|
|
llava/model/multimodal_encoder/eva_clip/__pycache__/factory.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_encoder/eva_clip/__pycache__/factory.cpython-310.pyc and b/llava/model/multimodal_encoder/eva_clip/__pycache__/factory.cpython-310.pyc differ
|
|
llava/model/multimodal_projector/__pycache__/builder.cpython-310.pyc
CHANGED
Binary files a/llava/model/multimodal_projector/__pycache__/builder.cpython-310.pyc and b/llava/model/multimodal_projector/__pycache__/builder.cpython-310.pyc differ
|
|
llava/train/__pycache__/llava_trainer.cpython-310.pyc
CHANGED
Binary files a/llava/train/__pycache__/llava_trainer.cpython-310.pyc and b/llava/train/__pycache__/llava_trainer.cpython-310.pyc differ
|
|
llava/train/__pycache__/train.cpython-310.pyc
CHANGED
Binary files a/llava/train/__pycache__/train.cpython-310.pyc and b/llava/train/__pycache__/train.cpython-310.pyc differ
|
|
requirements.txt
CHANGED
@@ -25,4 +25,5 @@ fvcore
|
|
25 |
fastapi==0.112.2
|
26 |
ftfy
|
27 |
xformers
|
28 |
-
torchaudio
|
|
|
|
25 |
fastapi==0.112.2
|
26 |
ftfy
|
27 |
xformers
|
28 |
+
torchaudio
|
29 |
+
diffusers
|