Spaces:

BenkHel
/

CumoThesis

Running

App Files Files Community

BenkHel commited on Jun 30

Commit

b0dba11

verified ·

1 Parent(s): 8df865e

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -74

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import subprocess
 import sys
 import os
@@ -10,7 +9,7 @@ import subprocess
 import spaces
 import cumo.serve.gradio_web_server as gws
-from transformers import AutoProcessor,AutoTokenizer, AutoImageProcessor
 import datetime
 import json
@@ -36,55 +35,6 @@ from cumo.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_ST
 from transformers import TextIteratorStreamer
 from threading import Thread
-# Execute the pip install command with additional options
-#subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'flash-attn', '--no-build-isolation', '-U']
-headers = {"User-Agent": "CuMo"}
-no_change_btn = gr.Button()
-enable_btn = gr.Button(interactive=True)
-disable_btn = gr.Button(interactive=False)
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model_path = 'BenkHel/CumoThesis'
-conv_mode = 'mistral_instruct_system' # Diese Variable wird noch für die Konversationstemplates benötigt
-load_8bit = False
-load_4bit = False
-import sys
-import os
-import argparse
-import time
-import subprocess
-import spaces
-import cumo.serve.gradio_web_server as gws
-import datetime
-import json
-import gradio as gr
-import requests
-from PIL import Image
-from cumo.conversation import (default_conversation, conv_templates, SeparatorStyle)
-from cumo.constants import LOGDIR
-from cumo.utils import (build_logger, server_error_msg, violates_moderation, moderation_msg)
-import hashlib
-import torch
-import io
-from cumo.constants import WORKER_HEART_BEAT_INTERVAL
-from cumo.utils import (build_logger, server_error_msg,
-    pretty_print_semaphore)
-from cumo.model.builder import load_pretrained_model
-from cumo.mm_utils import process_images, load_image_from_base64, tokenizer_image_token
-from cumo.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
-from transformers import TextIteratorStreamer
-from threading import Thread
-# Execute the pip install command with additional options
-#subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'flash-attn', '--no-build-isolation', '-U']
 headers = {"User-Agent": "CuMo"}
 no_change_btn = gr.Button()
@@ -98,17 +48,21 @@ model_name = 'CuMo-mistral-7b'
 conv_mode = 'mistral_instruct_system'
 load_8bit = False
 load_4bit = False
-tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name, load_8bit, load_4bit, device=device, use_flash_attn=False)
 model.config.training = False
 def upvote_last_response(state):
     return ("",) + (disable_btn,) * 3
 def downvote_last_response(state):
     return ("",) + (disable_btn,) * 3
 def flag_last_response(state):
     return ("",) + (disable_btn,) * 3
@@ -121,15 +75,12 @@ def add_text(state, imagebox, textbox, image_process_mode):
         state = conv_templates[conv_mode].copy()
     if imagebox is not None:
-        textbox = DEFAULT_IMAGE_TOKEN + '\n' + textbox
         image = Image.open(imagebox).convert('RGB')
     if imagebox is not None:
         textbox = (textbox, image, image_process_mode)
     state.append_message(state.roles[0], textbox)
     state.append_message(state.roles[1], None)
     yield (state, state.to_gradio_chatbot(), "", None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
 def delete_text(state, image_process_mode):
@@ -149,9 +100,8 @@ def regenerate(state, image_process_mode):
 @spaces.GPU
 def generate(state, imagebox, textbox, image_process_mode, temperature, top_p, max_output_tokens):
-    prompt = state.get_prompt()
     images = state.get_images(return_pil=True)
-    #prompt, image_args = process_image(prompt, images)
     ori_prompt = prompt
     num_image_tokens = 0
@@ -160,8 +110,6 @@ def generate(state, imagebox, textbox, image_process_mode, temperature, top_p, m
         if len(images) > 0:
             if len(images) != prompt.count(DEFAULT_IMAGE_TOKEN):
                 raise ValueError("Number of images does not match number of <image> tokens in prompt")
-            #images = [load_image_from_base64(image) for image in images]
             image_sizes = [image.size for image in images]
             images = process_images(images, image_processor, model.config)
@@ -174,7 +122,6 @@ def generate(state, imagebox, textbox, image_process_mode, temperature, top_p, m
             if getattr(model.config, 'mm_use_im_start_end', False):
                 replace_token = DEFAULT_IM_START_TOKEN + replace_token + DEFAULT_IM_END_TOKEN
             prompt = prompt.replace(DEFAULT_IMAGE_TOKEN, replace_token)
             num_image_tokens = prompt.count(replace_token) * model.get_vision_tower().num_patches
         else:
             images = None
@@ -193,7 +140,6 @@ def generate(state, imagebox, textbox, image_process_mode, temperature, top_p, m
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15)
     max_new_tokens = min(max_new_tokens, max_context_length - input_ids.shape[-1] - num_image_tokens)
     if max_new_tokens < 1:
         yield json.dumps({"text": ori_prompt + "Exceeds max token length. Please start a new conversation, thanks.", "error_code": 0}).encode() + b"\0"
         return
@@ -217,25 +163,29 @@ def generate(state, imagebox, textbox, image_process_mode, temperature, top_p, m
             generated_text = generated_text[:-len(stop_str)]
         state.messages[-1][-1] = generated_text
         yield (state, state.to_gradio_chatbot(), "", None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
     yield (state, state.to_gradio_chatbot(), "", None) + (enable_btn,) * 5
     torch.cuda.empty_cache()
 title_markdown = ("""
-# CuMo: Scaling Multimodal LLM with Co-Upcycled Mixture-of-Experts
-[[Project Page](https://chrisjuniorli.github.io/project/CuMo/)] [[Code](https://github.com/SHI-Labs/CuMo)] [[Model](https://huggingface.co/shi-labs/CuMo-mistral-7b)] | 📚 [[Arxiv](https://arxiv.org/pdf/2405.05949)]]
 """)
 tos_markdown = ("""
-### Terms of use
-By using this service, users are required to agree to the following terms:
-The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
-Please click the "Flag" button if you get any inappropriate answer! We will collect those to keep improving our moderator.
 For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
 """)
 learn_more_markdown = ("""
 ### License
 The service is a research preview intended for non-commercial use only, subject to the. Please contact us if you find any potential violation.
@@ -247,7 +197,15 @@ block_css = """
 }
 """
-textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
 with gr.Blocks(title="CuMo", theme=gr.themes.Default(), css=block_css) as demo:
     state = gr.State()

 import subprocess
 import sys
 import os
 import spaces
 import cumo.serve.gradio_web_server as gws
+from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor
 import datetime
 import json
 from transformers import TextIteratorStreamer
 from threading import Thread
 headers = {"User-Agent": "CuMo"}
 no_change_btn = gr.Button()
 conv_mode = 'mistral_instruct_system'
 load_8bit = False
 load_4bit = False
+tokenizer, model, image_processor, context_len = load_pretrained_model(
+    model_path, model_base, model_name, load_8bit, load_4bit, device=device, use_flash_attn=False
+)
 model.config.training = False
+# FIXED PROMPT
+FIXED_PROMPT = "What material is this item and how to dispose of it?"
 def upvote_last_response(state):
     return ("",) + (disable_btn,) * 3
 def downvote_last_response(state):
     return ("",) + (disable_btn,) * 3
 def flag_last_response(state):
     return ("",) + (disable_btn,) * 3
         state = conv_templates[conv_mode].copy()
     if imagebox is not None:
+        textbox = DEFAULT_IMAGE_TOKEN + '\n' + FIXED_PROMPT
         image = Image.open(imagebox).convert('RGB')
     if imagebox is not None:
         textbox = (textbox, image, image_process_mode)
     state.append_message(state.roles[0], textbox)
     state.append_message(state.roles[1], None)
     yield (state, state.to_gradio_chatbot(), "", None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
 def delete_text(state, image_process_mode):
 @spaces.GPU
 def generate(state, imagebox, textbox, image_process_mode, temperature, top_p, max_output_tokens):
+    prompt = FIXED_PROMPT  # <-- Hier fest!
     images = state.get_images(return_pil=True)
     ori_prompt = prompt
     num_image_tokens = 0
         if len(images) > 0:
             if len(images) != prompt.count(DEFAULT_IMAGE_TOKEN):
                 raise ValueError("Number of images does not match number of <image> tokens in prompt")
             image_sizes = [image.size for image in images]
             images = process_images(images, image_processor, model.config)
             if getattr(model.config, 'mm_use_im_start_end', False):
                 replace_token = DEFAULT_IM_START_TOKEN + replace_token + DEFAULT_IM_END_TOKEN
             prompt = prompt.replace(DEFAULT_IMAGE_TOKEN, replace_token)
             num_image_tokens = prompt.count(replace_token) * model.get_vision_tower().num_patches
         else:
             images = None
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15)
     max_new_tokens = min(max_new_tokens, max_context_length - input_ids.shape[-1] - num_image_tokens)
     if max_new_tokens < 1:
         yield json.dumps({"text": ori_prompt + "Exceeds max token length. Please start a new conversation, thanks.", "error_code": 0}).encode() + b"\0"
         return
             generated_text = generated_text[:-len(stop_str)]
         state.messages[-1][-1] = generated_text
         yield (state, state.to_gradio_chatbot(), "", None) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
     yield (state, state.to_gradio_chatbot(), "", None) + (enable_btn,) * 5
     torch.cuda.empty_cache()
 title_markdown = ("""
+# CuMo: Trained for waste management
 """)
 tos_markdown = ("""
+### Source and Terms of use
+This demo is based on the original CuMo project by SHI-Labs ([GitHub](https://github.com/SHI-Labs/CuMo)).
+If you use this service or build upon this work, please cite the original publication:
+Li, Jiachen and Wang, Xinyao and Zhu, Sijie and Kuo, Chia-wen and Xu, Lu and Chen, Fan and Jain, Jitesh and Shi, Humphrey and Wen, Longyin.
+CuMo: Scaling Multimodal LLM with Co-Upcycled Mixture-of-Experts. arXiv preprint, 2024.
+[[arXiv](https://arxiv.org/abs/2405.05949)]
+By using this service, users are required to agree to the following terms:
+The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes.
 For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
 """)
 learn_more_markdown = ("""
 ### License
 The service is a research preview intended for non-commercial use only, subject to the. Please contact us if you find any potential violation.
 }
 """
+textbox = gr.Textbox(
+    show_label=False,
+    placeholder="Prompt is fixed: What material is this item and how to dispose of it?",
+    container=False,
+    interactive=False
+)
 with gr.Blocks(title="CuMo", theme=gr.themes.Default(), css=block_css) as demo:
     state = gr.State()