Spaces:

ginigen
/

Ovis2-8B

Running

App Files Files Community

ginipick commited on Feb 19

Commit

2a6f5e7

verified ·

1 Parent(s): 86c8d9d

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -106

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import subprocess
 subprocess.run('pip install flash-attn==2.7.0.post2 --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 import os
 import re
 import logging
@@ -11,23 +12,21 @@ import torch
 import gradio as gr
 from transformers import AutoModelForCausalLM, TextIteratorStreamer
-# 모델 및 토크나이저 로딩
 model_name = 'AIDC-AI/Ovis2-8B'
 use_thread = False
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype=torch.bfloat16,
-    multimodal_max_length=8192,
-    trust_remote_code=True
-).to(device='cuda')
 text_tokenizer = model.get_text_tokenizer()
 visual_tokenizer = model.get_visual_tokenizer()
 streamer = TextIteratorStreamer(text_tokenizer, skip_prompt=True, skip_special_tokens=True)
 image_placeholder = '<image>'
 cur_dir = os.path.dirname(os.path.abspath(__file__))
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -47,10 +46,9 @@ def initialize_gen_kwargs():
 def submit_chat(chatbot, text_input):
     response = ''
     chatbot.append((text_input, response))
-    return chatbot, ''
-@gradio.routes.no_temp_folder()
-@gradio.gpu()
 def ovis_chat(chatbot: List[List[str]], image_input: Any):
     conversations, model_inputs = prepare_inputs(chatbot, image_input)
     gen_kwargs = initialize_gen_kwargs()
@@ -75,8 +73,14 @@ def ovis_chat(chatbot: List[List[str]], image_input: Any):
     log_conversation(chatbot)
 def prepare_inputs(chatbot: List[List[str]], image_input: Any):
-    conversations = []
     for query, response in chatbot[:-1]:
         conversations.extend([
             {"from": "human", "value": query},
@@ -87,7 +91,6 @@ def prepare_inputs(chatbot: List[List[str]], image_input: Any):
     conversations.append({"from": "human", "value": last_query})
     if image_input is not None:
-        # 이미지가 포함되면 첫 번째 human 메시지에 이미지 태그 추가
         for conv in conversations:
             if conv["from"] == "human":
                 conv["value"] = f'{image_placeholder}\n{conv["value"]}'
@@ -114,7 +117,6 @@ def log_conversation(chatbot):
 def clear_chat():
     return [], None, ""
-# 로고 SVG 로드 및 스타일 수정
 with open(f"{cur_dir}/resource/logo.svg", "r", encoding="utf-8") as svg_file:
     svg_content = svg_file.read()
 font_size = "2.5em"
@@ -124,14 +126,7 @@ html = f"""
     <span style="display: inline-block; vertical-align: middle;">{svg_content}</span>
     <span style="display: inline-block; vertical-align: middle;">{model_name.split('/')[-1]}</span>
 </p>
-<center>
-    <font size=3>
-        <b>Ovis</b> has been open-sourced on
-        <a href='https://huggingface.co/{model_name}'>😊 Huggingface</a> and
-        <a href='https://github.com/AIDC-AI/Ovis'>🌟 GitHub</a>.
-        If you find Ovis useful, a like❤️ or a star🌟 would be appreciated.
-    </font>
-</center>
 """
 latex_delimiters_set = [{
@@ -164,75 +159,18 @@ latex_delimiters_set = [{
         "display": True
     }]
-text_input = gr.Textbox(label="Prompt", placeholder="Enter your text here...", lines=1, container=False)
-# 커스텀 CSS (배경 그라데이션, 반투명 컨테이너, 버튼 애니메이션 등)
-custom_css = """
-body {
-    background: linear-gradient(135deg, #667eea, #764ba2);
-    font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
-    color: #333;
-    margin: 0;
-    padding: 0;
-}
-.gradio-container {
-    background: rgba(255, 255, 255, 0.95);
-    border-radius: 15px;
-    padding: 30px 40px;
-    box-shadow: 0 8px 30px rgba(0, 0, 0, 0.3);
-    margin: 40px auto;
-    max-width: 1200px;
-}
-.gradio-container h1 {
-    color: #333;
-    text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.2);
-}
-.fillable {
-    width: 95% !important;
-    max-width: unset !important;
-}
-#examples_container {
-    margin: auto;
-    width: 90%;
-}
-#examples_row {
-    justify-content: center;
-}
-.sidebar {
-    background: rgba(255, 255, 255, 0.98);
-    border-radius: 10px;
-    padding: 20px;
-    box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
-}
-button, .btn {
-    background: linear-gradient(90deg, #ff8a00, #e52e71);
-    border: none;
-    color: #fff;
-    padding: 12px 24px;
-    text-transform: uppercase;
-    font-weight: bold;
-    letter-spacing: 1px;
-    border-radius: 5px;
-    cursor: pointer;
-    transition: transform 0.2s ease-in-out;
-}
-button:hover, .btn:hover {
-    transform: scale(1.05);
-}
-"""
-with gr.Blocks(css=custom_css, title=model_name.split('/')[-1]) as demo:
     gr.HTML(html)
     with gr.Row():
         with gr.Column(scale=3):
-            image_input = gr.Image(label="Image", height=350, type="pil")
             gr.Examples(
                 examples=[
-                    [f"{cur_dir}/examples/ovis2_math0.jpg", "Each face of the polyhedron shown is either a triangle or a square. Each square borders 4 triangles, and each triangle borders 3 squares. The polyhedron has 6 squares. How many triangles does it have?\n\nProvide a step-by-step solution to the problem, and conclude with 'the answer is' followed by the final solution."],
-                    [f"{cur_dir}/examples/ovis2_math1.jpg", "A large square touches another two squares, as shown in the picture. The numbers inside the smaller squares indicate their areas. What is the area of the largest square?\n\nProvide a step-by-step solution to the problem, and conclude with 'the answer is' followed by the final solution."],
-                    [f"{cur_dir}/examples/ovis2_figure0.png", "Explain this model."],
-                    [f"{cur_dir}/examples/ovis2_figure1.png", "Organize the notes about GRPO in the figure."],
-                    [f"{cur_dir}/examples/ovis2_multi0.jpg", "Posso avere un frappuccino e un caffè americano di taglia M? Quanto costa in totale?"],
                 ],
                 inputs=[image_input, text_input]
             )
@@ -240,27 +178,11 @@ with gr.Blocks(css=custom_css, title=model_name.split('/')[-1]) as demo:
             chatbot = gr.Chatbot(label="Ovis", layout="panel", height=600, show_copy_button=True, latex_delimiters=latex_delimiters_set)
             text_input.render()
             with gr.Row():
-                send_btn = gr.Button("Send")
-                clear_btn = gr.Button("Clear")
-    send_click_event = send_btn.click(
-        submit_chat,
-        inputs=[chatbot, text_input],
-        outputs=[chatbot, text_input]
-    ).then(
-        ovis_chat,
-        inputs=[chatbot, image_input],
-        outputs=chatbot
-    )
-    submit_event = text_input.submit(
-        submit_chat,
-        inputs=[chatbot, text_input],
-        outputs=[chatbot, text_input]
-    ).then(
-        ovis_chat,
-        inputs=[chatbot, image_input],
-        outputs=chatbot
-    )
     clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input])
 demo.launch()

 import subprocess
 subprocess.run('pip install flash-attn==2.7.0.post2 --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+import spaces
 import os
 import re
 import logging
 import gradio as gr
 from transformers import AutoModelForCausalLM, TextIteratorStreamer
 model_name = 'AIDC-AI/Ovis2-8B'
 use_thread = False
+# load model
+model = AutoModelForCausalLM.from_pretrained(model_name,
+                                             torch_dtype=torch.bfloat16,
+                                             multimodal_max_length=8192,
+                                             trust_remote_code=True).to(device='cuda')
 text_tokenizer = model.get_text_tokenizer()
 visual_tokenizer = model.get_visual_tokenizer()
 streamer = TextIteratorStreamer(text_tokenizer, skip_prompt=True, skip_special_tokens=True)
 image_placeholder = '<image>'
 cur_dir = os.path.dirname(os.path.abspath(__file__))
+logging.getLogger("httpx").setLevel(logging.WARNING)
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 def submit_chat(chatbot, text_input):
     response = ''
     chatbot.append((text_input, response))
+    return chatbot ,''
+@spaces.GPU
 def ovis_chat(chatbot: List[List[str]], image_input: Any):
     conversations, model_inputs = prepare_inputs(chatbot, image_input)
     gen_kwargs = initialize_gen_kwargs()
     log_conversation(chatbot)
 def prepare_inputs(chatbot: List[List[str]], image_input: Any):
+    # conversations = [{
+    #     "from": "system",
+    #     "value": "You are a helpful assistant, and your task is to provide reliable and structured responses to users."
+    # }]
+    conversations= []
     for query, response in chatbot[:-1]:
         conversations.extend([
             {"from": "human", "value": query},
     conversations.append({"from": "human", "value": last_query})
     if image_input is not None:
         for conv in conversations:
             if conv["from"] == "human":
                 conv["value"] = f'{image_placeholder}\n{conv["value"]}'
 def clear_chat():
     return [], None, ""
 with open(f"{cur_dir}/resource/logo.svg", "r", encoding="utf-8") as svg_file:
     svg_content = svg_file.read()
 font_size = "2.5em"
     <span style="display: inline-block; vertical-align: middle;">{svg_content}</span>
     <span style="display: inline-block; vertical-align: middle;">{model_name.split('/')[-1]}</span>
 </p>
+<center><font size=3><b>Ovis</b> has been open-sourced on <a href='https://huggingface.co/{model_name}'>😊 Huggingface</a> and <a href='https://github.com/AIDC-AI/Ovis'>🌟 GitHub</a>. If you find Ovis useful, a like❤️ or a star🌟 would be appreciated.</font></center>
 """
 latex_delimiters_set = [{
         "display": True
     }]
+text_input = gr.Textbox(label="prompt", placeholder="Enter your text here...", lines=1, container=False)
+with gr.Blocks(title=model_name.split('/')[-1], theme=gr.themes.Ocean()) as demo:
     gr.HTML(html)
     with gr.Row():
         with gr.Column(scale=3):
+            image_input = gr.Image(label="image", height=350, type="pil")
             gr.Examples(
                 examples=[
+                    [f"{cur_dir}/examples/ovis2_math2.png", "Find the area of the shaded region."],
+                    [f"{cur_dir}/examples/ovis2_figure2.png", "What is net profit margin as a percentage of total revenue?"],
+                    [f"{cur_dir}/examples/ovis2_table0.png", "Convert the table to markdown."],
+                    [f"{cur_dir}/examples/ovis2_ocr0.jpeg", "OCR:"],
                 ],
                 inputs=[image_input, text_input]
             )
             chatbot = gr.Chatbot(label="Ovis", layout="panel", height=600, show_copy_button=True, latex_delimiters=latex_delimiters_set)
             text_input.render()
             with gr.Row():
+                send_btn = gr.Button("Send", variant="primary")
+                clear_btn = gr.Button("Clear", variant="secondary")
+    send_click_event = send_btn.click(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
+    submit_event = text_input.submit(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
     clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input])
 demo.launch()