jiuhai commited on
Commit
54c78cf
·
1 Parent(s): 79d5afe
Files changed (43) hide show
  1. app.py +196 -6
  2. llava/__pycache__/__init__.cpython-310.pyc +0 -0
  3. llava/__pycache__/constants.cpython-310.pyc +0 -0
  4. llava/__pycache__/conversation.cpython-310.pyc +0 -0
  5. llava/__pycache__/mm_utils.cpython-310.pyc +0 -0
  6. llava/__pycache__/utils.cpython-310.pyc +0 -0
  7. llava/model/__pycache__/__init__.cpython-310.pyc +0 -0
  8. llava/model/__pycache__/builder.cpython-310.pyc +0 -0
  9. llava/model/__pycache__/llava_arch.cpython-310.pyc +0 -0
  10. llava/model/language_model/__pycache__/llava_llama.cpython-310.pyc +0 -0
  11. llava/model/language_model/__pycache__/llava_mistral.cpython-310.pyc +0 -0
  12. llava/model/language_model/__pycache__/llava_mpt.cpython-310.pyc +0 -0
  13. llava/model/multimodal_encoder/__pycache__/builder.cpython-310.pyc +0 -0
  14. llava/model/multimodal_encoder/__pycache__/clip_encoder.cpython-310.pyc +0 -0
  15. llava/model/multimodal_encoder/__pycache__/imagebind.cpython-310.pyc +0 -0
  16. llava/model/multimodal_encoder/__pycache__/open_clip_encoder.cpython-310.pyc +0 -0
  17. llava/model/multimodal_encoder/__pycache__/siglip_encoder.cpython-310.pyc +0 -0
  18. llava/model/multimodal_encoder/dev_eva_clip/__pycache__/eva_vit.cpython-310.pyc +0 -0
  19. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/__init__.cpython-310.pyc +0 -0
  20. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/constants.cpython-310.pyc +0 -0
  21. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/eva_vit_model.cpython-310.pyc +0 -0
  22. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/factory.cpython-310.pyc +0 -0
  23. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_configs.cpython-310.pyc +0 -0
  24. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_model.cpython-310.pyc +0 -0
  25. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/loss.cpython-310.pyc +0 -0
  26. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/model.cpython-310.pyc +0 -0
  27. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/modified_resnet.cpython-310.pyc +0 -0
  28. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/openai.cpython-310.pyc +0 -0
  29. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/pretrained.cpython-310.pyc +0 -0
  30. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/rope.cpython-310.pyc +0 -0
  31. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/timm_model.cpython-310.pyc +0 -0
  32. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/tokenizer.cpython-310.pyc +0 -0
  33. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transform.cpython-310.pyc +0 -0
  34. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transformer.cpython-310.pyc +0 -0
  35. llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/utils.cpython-310.pyc +0 -0
  36. llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_encoder.cpython-310.pyc +0 -0
  37. llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_processors.cpython-310.pyc +0 -0
  38. llava/model/multimodal_encoder/eva_clip/__pycache__/eva_vit.cpython-310.pyc +0 -0
  39. llava/model/multimodal_encoder/eva_clip/__pycache__/factory.cpython-310.pyc +0 -0
  40. llava/model/multimodal_projector/__pycache__/builder.cpython-310.pyc +0 -0
  41. llava/train/__pycache__/llava_trainer.cpython-310.pyc +0 -0
  42. llava/train/__pycache__/train.cpython-310.pyc +0 -0
  43. requirements.txt +2 -1
app.py CHANGED
@@ -31,9 +31,7 @@ enable_btn = gr.Button(interactive=True)
31
  disable_btn = gr.Button(interactive=False)
32
 
33
  argparser = argparse.ArgumentParser()
34
- argparser.add_argument("--server_name", default="0.0.0.0", type=str)
35
- argparser.add_argument("--port", default="6324", type=str)
36
- argparser.add_argument("--model-path", default="umd-vt-nyu/clip-evaclip-und-gen-sft-3v", type=str)
37
  argparser.add_argument("--model-base", type=str, default=None)
38
  argparser.add_argument("--num-gpus", type=int, default=1)
39
  argparser.add_argument("--conv-mode", type=str, default="llama3")
@@ -49,7 +47,7 @@ model_path = args.model_path
49
  conv_mode = args.conv_mode
50
  filt_invalid="cut"
51
  model_name = get_model_name_from_path(args.model_path)
52
- model_name = 'clip-evaclip-und-gen-pretrain'
53
  model_kwargs = {
54
  "use_cache": False,
55
  "trust_remote_code": True,
@@ -262,7 +260,7 @@ with gr.Blocks(title="llava", theme=gr.themes.Default(), css=block_css) as demo:
262
  upvote_btn = gr.Button(value="👍 Upvote", interactive=False)
263
  downvote_btn = gr.Button(value="👎 Downvote", interactive=False)
264
  flag_btn = gr.Button(value="⚠️ Flag", interactive=False)
265
- #stop_btn = gr.Button(value="⏹️ Stop Generation", interactive=False)
266
  regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False)
267
  clear_btn = gr.Button(value="🗑️ Clear", interactive=False)
268
 
@@ -327,4 +325,196 @@ with gr.Blocks(title="llava", theme=gr.themes.Default(), css=block_css) as demo:
327
  demo.queue(
328
  status_update_rate=10,
329
  api_open=False
330
- ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  disable_btn = gr.Button(interactive=False)
32
 
33
  argparser = argparse.ArgumentParser()
34
+ argparser.add_argument("--model-path", default="umd-vt-nyu/clip-evaclip-und-gen-sft", type=str)
 
 
35
  argparser.add_argument("--model-base", type=str, default=None)
36
  argparser.add_argument("--num-gpus", type=int, default=1)
37
  argparser.add_argument("--conv-mode", type=str, default="llama3")
 
47
  conv_mode = args.conv_mode
48
  filt_invalid="cut"
49
  model_name = get_model_name_from_path(args.model_path)
50
+ model_name = 'clip-evaclip-und-gen-sft'
51
  model_kwargs = {
52
  "use_cache": False,
53
  "trust_remote_code": True,
 
260
  upvote_btn = gr.Button(value="👍 Upvote", interactive=False)
261
  downvote_btn = gr.Button(value="👎 Downvote", interactive=False)
262
  flag_btn = gr.Button(value="⚠️ Flag", interactive=False)
263
+ stop_btn = gr.Button(value="⏹️ Stop Generation", interactive=False)
264
  regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False)
265
  clear_btn = gr.Button(value="🗑️ Clear", interactive=False)
266
 
 
325
  demo.queue(
326
  status_update_rate=10,
327
  api_open=False
328
+ ).launch()
329
+
330
+
331
+
332
+
333
+
334
+
335
+
336
+
337
+
338
+ # import gradio as gr
339
+ # import os
340
+ # import torch
341
+ # import argparse
342
+ # from transformers import TextIteratorStreamer
343
+ # from threading import Thread
344
+ # from PIL import Image
345
+ # from llava import conversation as conversation_lib
346
+ # from llava.constants import *
347
+ # from llava.conversation import conv_templates, SeparatorStyle
348
+ # from llava.model.builder import load_pretrained_model
349
+ # from llava.utils import disable_torch_init
350
+ # from llava.mm_utils import tokenizer_image_token, get_model_name_from_path, process_images
351
+ # from diffusers import DiffusionPipeline
352
+
353
+ # # Define paths and configurations
354
+ # # diffusion_path = "/export/jchen169/hub/models--BAAI--Emu2-Gen/snapshots/a41a2dcd777a68225dddc72c7213b064ee06f4a0"
355
+
356
+ # argparser = argparse.ArgumentParser()
357
+ # argparser.add_argument("--model-path", default="umd-vt-nyu/clip-evaclip-und-gen-sft-3v", type=str)
358
+ # argparser.add_argument("--conv-mode", type=str, default="llama3")
359
+ # argparser.add_argument("--temperature", type=float, default=0.2)
360
+ # argparser.add_argument("--max-new-tokens", type=int, default=64)
361
+ # argparser.add_argument("--num_frames", type=int, default=16)
362
+ # argparser.add_argument("--load-8bit", action="store_true")
363
+ # argparser.add_argument("--load-4bit", action="store_true")
364
+ # argparser.add_argument("--debug", action="store_true")
365
+ # args = argparser.parse_args()
366
+
367
+ # # Load LLaVA model
368
+ # disable_torch_init()
369
+ # model_name = get_model_name_from_path(args.model_path)
370
+ # tokenizer, model, image_processor, context_len = load_pretrained_model(args.model_path, None, model_name)
371
+ # our_chatbot = None
372
+
373
+ # # Load Diffusion model for image generation
374
+ # pipe = DiffusionPipeline.from_pretrained(
375
+ # 'BAAI/Emu2-Gen',
376
+ # custom_pipeline="pipeline_llava_gen",
377
+ # torch_dtype=torch.bfloat16,
378
+ # use_safetensors=True,
379
+ # variant="bf16",
380
+ # multimodal_encoder=model,
381
+ # tokenizer=tokenizer,
382
+ # )
383
+ # pipe.vae.to("cuda:0")
384
+ # pipe.unet.to("cuda:0")
385
+ # pipe.safety_checker.to("cuda:0")
386
+
387
+ # def upvote_last_response(state):
388
+ # return ("",) + (disable_btn,) * 3
389
+
390
+ # def downvote_last_response(state):
391
+ # return ("",) + (disable_btn,) * 3
392
+
393
+ # def flag_last_response(state):
394
+ # return ("",) + (disable_btn,) * 3
395
+
396
+ # def clear_history():
397
+ # state = conv_templates[conv_mode].copy()
398
+ # return (state, state.to_gradio_chatbot(), "", None) + (disable_btn,) * 5
399
+
400
+ # def add_text(state, imagebox, textbox, image_process_mode):
401
+ # if state is None:
402
+ # state = conv_templates[conv_mode].copy()
403
+
404
+ # if imagebox is not None:
405
+ # textbox = DEFAULT_IMAGE_TOKEN + '\n' + textbox
406
+ # image = Image.open(imagebox).convert('RGB')
407
+ # if imagebox is not None:
408
+ # textbox = (textbox, image, image_process_mode)
409
+
410
+ # state.append_message(state.roles[0], textbox)
411
+ # state.append_message(state.roles[1], None)
412
+
413
+ # yield (state, state.to_gradio_chatbot(), "", None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
414
+
415
+ # def generate(state, imagebox, textbox, image_process_mode, temperature, top_p, max_output_tokens):
416
+ # prompt = state.get_prompt()
417
+ # images = state.get_images(return_pil=True)
418
+ # ori_prompt = prompt
419
+ # num_image_tokens = 0
420
+
421
+ # if images is not None and len(images) > 0:
422
+ # if len(images) > 0:
423
+ # if len(images) != prompt.count(DEFAULT_IMAGE_TOKEN):
424
+ # raise ValueError("Number of images does not match number of <image> tokens in prompt")
425
+
426
+ # image_sizes = [image.size for image in images]
427
+ # images = process_images(images, image_processor, model.config)
428
+ # if type(images) is list:
429
+ # images = [image.to(model.device, dtype=torch.float16) for image in images]
430
+ # else:
431
+ # images = images.to(model.device, dtype=torch.float16)
432
+ # else:
433
+ # images = None
434
+ # image_sizes = None
435
+ # image_args = {"images": images, "image_sizes": image_sizes}
436
+ # else:
437
+ # images = None
438
+ # image_args = {}
439
+
440
+ # max_context_length = getattr(model.config, 'max_position_embeddings', 2048)
441
+ # max_new_tokens = 512
442
+ # do_sample = True if temperature > 0.001 else False
443
+ # stop_str = state.sep if state.sep_style in [SeparatorStyle.SINGLE, SeparatorStyle.MPT] else state.sep2
444
+
445
+ # input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_IDX, return_tensors='pt').unsqueeze(0).to(model.device)
446
+ # streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15)
447
+ # max_new_tokens = min(max_new_tokens, max_context_length - input_ids.shape[-1] - num_image_tokens)
448
+
449
+ # if max_new_tokens < 1:
450
+ # return
451
+
452
+ # thread = Thread(target=model.generate, kwargs=dict(
453
+ # inputs=input_ids,
454
+ # do_sample=do_sample,
455
+ # temperature=temperature,
456
+ # top_p=top_p,
457
+ # max_new_tokens=max_new_tokens,
458
+ # streamer=streamer,
459
+ # use_cache=True,
460
+ # pad_token_id=tokenizer.eos_token_id,
461
+ # **image_args
462
+ # ))
463
+ # thread.start()
464
+ # generated_text = ''
465
+ # for new_text in streamer:
466
+ # generated_text += new_text
467
+ # if generated_text.endswith(stop_str):
468
+ # generated_text = generated_text[:-len(stop_str)]
469
+ # state.messages[-1][-1] = generated_text
470
+ # yield (state, state.to_gradio_chatbot(), "", None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
471
+
472
+ # yield (state, state.to_gradio_chatbot(), "", None) + (enable_btn,) * 5
473
+ # torch.cuda.empty_cache()
474
+
475
+ # def add_template(prompt):
476
+ # conv = conv_templates['llama3'].copy()
477
+ # conv.append_message(conv.roles[0], prompt[0])
478
+ # conv.append_message(conv.roles[1], None)
479
+ # prompt = conv.get_prompt()
480
+ # return [prompt]
481
+
482
+
483
+ # def generate_image(prompt):
484
+ # prompt = add_template(prompt)
485
+ # gen_img = pipe(prompt, guidance_scale=3.0)
486
+ # return gen_img.image
487
+
488
+ # # Interface setup
489
+ # with gr.Blocks(title="LLaVA Chatbot with Image Generation") as demo:
490
+ # state = gr.State()
491
+ # gr.Markdown("# LLaVA Chatbot with Image Generation")
492
+
493
+ # with gr.Row():
494
+ # with gr.Column(scale=3):
495
+ # imagebox = gr.Image(label="Input Image", type="filepath")
496
+ # image_process_mode = gr.Radio(
497
+ # ["Crop", "Resize", "Pad", "Default"],
498
+ # value="Default",
499
+ # label="Preprocess for non-square image", visible=False)
500
+ # temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature")
501
+ # top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P")
502
+ # max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens")
503
+ # with gr.Column(scale=8):
504
+ # chatbot = gr.Chatbot(label="LLaVA Chatbot", height=650, layout="panel")
505
+ # textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
506
+ # submit_btn = gr.Button(value="Send", variant="primary")
507
+
508
+ # with gr.Row() as button_row:
509
+ # clear_btn = gr.Button(value="🗑️ Clear", interactive=False)
510
+
511
+ # # Define actions
512
+ # submit_btn.click(
513
+ # lambda state, imagebox, textbox, image_process_mode, temperature, top_p, max_output_tokens: (
514
+ # generate_image([textbox]) if "generate image" in textbox.lower() else add_text(
515
+ # state, imagebox, textbox, image_process_mode)),
516
+ # [state, imagebox, textbox, image_process_mode, temperature, top_p, max_output_tokens],
517
+ # [state, chatbot, textbox, imagebox]
518
+ # )
519
+
520
+ # demo.queue(status_update_rate=10, api_open=False).launch()
llava/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/llava/__pycache__/__init__.cpython-310.pyc and b/llava/__pycache__/__init__.cpython-310.pyc differ
 
llava/__pycache__/constants.cpython-310.pyc CHANGED
Binary files a/llava/__pycache__/constants.cpython-310.pyc and b/llava/__pycache__/constants.cpython-310.pyc differ
 
llava/__pycache__/conversation.cpython-310.pyc CHANGED
Binary files a/llava/__pycache__/conversation.cpython-310.pyc and b/llava/__pycache__/conversation.cpython-310.pyc differ
 
llava/__pycache__/mm_utils.cpython-310.pyc CHANGED
Binary files a/llava/__pycache__/mm_utils.cpython-310.pyc and b/llava/__pycache__/mm_utils.cpython-310.pyc differ
 
llava/__pycache__/utils.cpython-310.pyc CHANGED
Binary files a/llava/__pycache__/utils.cpython-310.pyc and b/llava/__pycache__/utils.cpython-310.pyc differ
 
llava/model/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/llava/model/__pycache__/__init__.cpython-310.pyc and b/llava/model/__pycache__/__init__.cpython-310.pyc differ
 
llava/model/__pycache__/builder.cpython-310.pyc CHANGED
Binary files a/llava/model/__pycache__/builder.cpython-310.pyc and b/llava/model/__pycache__/builder.cpython-310.pyc differ
 
llava/model/__pycache__/llava_arch.cpython-310.pyc CHANGED
Binary files a/llava/model/__pycache__/llava_arch.cpython-310.pyc and b/llava/model/__pycache__/llava_arch.cpython-310.pyc differ
 
llava/model/language_model/__pycache__/llava_llama.cpython-310.pyc CHANGED
Binary files a/llava/model/language_model/__pycache__/llava_llama.cpython-310.pyc and b/llava/model/language_model/__pycache__/llava_llama.cpython-310.pyc differ
 
llava/model/language_model/__pycache__/llava_mistral.cpython-310.pyc CHANGED
Binary files a/llava/model/language_model/__pycache__/llava_mistral.cpython-310.pyc and b/llava/model/language_model/__pycache__/llava_mistral.cpython-310.pyc differ
 
llava/model/language_model/__pycache__/llava_mpt.cpython-310.pyc CHANGED
Binary files a/llava/model/language_model/__pycache__/llava_mpt.cpython-310.pyc and b/llava/model/language_model/__pycache__/llava_mpt.cpython-310.pyc differ
 
llava/model/multimodal_encoder/__pycache__/builder.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/__pycache__/builder.cpython-310.pyc and b/llava/model/multimodal_encoder/__pycache__/builder.cpython-310.pyc differ
 
llava/model/multimodal_encoder/__pycache__/clip_encoder.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/__pycache__/clip_encoder.cpython-310.pyc and b/llava/model/multimodal_encoder/__pycache__/clip_encoder.cpython-310.pyc differ
 
llava/model/multimodal_encoder/__pycache__/imagebind.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/__pycache__/imagebind.cpython-310.pyc and b/llava/model/multimodal_encoder/__pycache__/imagebind.cpython-310.pyc differ
 
llava/model/multimodal_encoder/__pycache__/open_clip_encoder.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/__pycache__/open_clip_encoder.cpython-310.pyc and b/llava/model/multimodal_encoder/__pycache__/open_clip_encoder.cpython-310.pyc differ
 
llava/model/multimodal_encoder/__pycache__/siglip_encoder.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/__pycache__/siglip_encoder.cpython-310.pyc and b/llava/model/multimodal_encoder/__pycache__/siglip_encoder.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/__pycache__/eva_vit.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/__pycache__/eva_vit.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/__pycache__/eva_vit.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/__init__.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/__init__.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/constants.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/constants.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/constants.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/eva_vit_model.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/eva_vit_model.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/eva_vit_model.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/factory.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/factory.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/factory.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_configs.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_configs.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_configs.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_model.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_model.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/hf_model.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/loss.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/loss.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/loss.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/model.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/model.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/model.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/modified_resnet.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/modified_resnet.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/modified_resnet.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/openai.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/openai.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/openai.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/pretrained.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/pretrained.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/pretrained.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/rope.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/rope.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/rope.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/timm_model.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/timm_model.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/timm_model.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/tokenizer.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/tokenizer.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/tokenizer.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transform.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transform.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transform.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transformer.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transformer.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/transformer.cpython-310.pyc differ
 
llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/utils.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/utils.cpython-310.pyc and b/llava/model/multimodal_encoder/dev_eva_clip/eva_clip/__pycache__/utils.cpython-310.pyc differ
 
llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_encoder.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_encoder.cpython-310.pyc and b/llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_encoder.cpython-310.pyc differ
 
llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_processors.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_processors.cpython-310.pyc and b/llava/model/multimodal_encoder/eva_clip/__pycache__/eva_clip_processors.cpython-310.pyc differ
 
llava/model/multimodal_encoder/eva_clip/__pycache__/eva_vit.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/eva_clip/__pycache__/eva_vit.cpython-310.pyc and b/llava/model/multimodal_encoder/eva_clip/__pycache__/eva_vit.cpython-310.pyc differ
 
llava/model/multimodal_encoder/eva_clip/__pycache__/factory.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_encoder/eva_clip/__pycache__/factory.cpython-310.pyc and b/llava/model/multimodal_encoder/eva_clip/__pycache__/factory.cpython-310.pyc differ
 
llava/model/multimodal_projector/__pycache__/builder.cpython-310.pyc CHANGED
Binary files a/llava/model/multimodal_projector/__pycache__/builder.cpython-310.pyc and b/llava/model/multimodal_projector/__pycache__/builder.cpython-310.pyc differ
 
llava/train/__pycache__/llava_trainer.cpython-310.pyc CHANGED
Binary files a/llava/train/__pycache__/llava_trainer.cpython-310.pyc and b/llava/train/__pycache__/llava_trainer.cpython-310.pyc differ
 
llava/train/__pycache__/train.cpython-310.pyc CHANGED
Binary files a/llava/train/__pycache__/train.cpython-310.pyc and b/llava/train/__pycache__/train.cpython-310.pyc differ
 
requirements.txt CHANGED
@@ -25,4 +25,5 @@ fvcore
25
  fastapi==0.112.2
26
  ftfy
27
  xformers
28
- torchaudio
 
 
25
  fastapi==0.112.2
26
  ftfy
27
  xformers
28
+ torchaudio
29
+ diffusers