runninglsy commited on
Commit
fc49e89
·
1 Parent(s): 95b4f87

update chat

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
  title: Ovis2 16B
3
- emoji: 🐠
4
- colorFrom: pink
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.15.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
1
  ---
2
  title: Ovis2 16B
3
+ emoji: 🦫
4
+ colorFrom: blue
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.1.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
app.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import spaces
2
+ import os
3
+ import re
4
+ import time
5
+ import gradio as gr
6
+ import torch
7
+ from transformers import AutoModelForCausalLM
8
+ from transformers import TextIteratorStreamer
9
+ from threading import Thread
10
+
11
+ model_name = 'AIDC-AI/Ovis2-16B'
12
+
13
+ # load model
14
+ model = AutoModelForCausalLM.from_pretrained(model_name,
15
+ torch_dtype=torch.bfloat16,
16
+ multimodal_max_length=8192,
17
+ trust_remote_code=True).to(device='cuda')
18
+ text_tokenizer = model.get_text_tokenizer()
19
+ visual_tokenizer = model.get_visual_tokenizer()
20
+ streamer = TextIteratorStreamer(text_tokenizer, skip_prompt=True, skip_special_tokens=True)
21
+ image_placeholder = '<image>'
22
+ cur_dir = os.path.dirname(os.path.abspath(__file__))
23
+
24
+ def submit_chat(chatbot, text_input):
25
+ response = ''
26
+ chatbot.append((text_input, response))
27
+ return chatbot ,''
28
+
29
+ @spaces.GPU
30
+ def ovis_chat(chatbot, image_input):
31
+ # preprocess inputs
32
+ conversations = [{
33
+ "from": "system",
34
+ "value": "You are Ovis, a multimodal large language model developed by Alibaba International, and your task is to provide reliable and structured responses to users. 你是Ovis,由阿里国际研发的多模态大模型,你的任务是为用户提供可靠、结构化的回复。"
35
+ }]
36
+ response = ""
37
+ text_input = chatbot[-1][0]
38
+ for query, response in chatbot[:-1]:
39
+ conversations.append({
40
+ "from": "human",
41
+ "value": query
42
+ })
43
+ conversations.append({
44
+ "from": "gpt",
45
+ "value": response
46
+ })
47
+ text_input = text_input.replace(image_placeholder, '')
48
+ conversations.append({
49
+ "from": "human",
50
+ "value": text_input
51
+ })
52
+ if image_input is not None:
53
+ conversations[0]["value"] = image_placeholder + '\n' + conversations[0]["value"]
54
+ prompt, input_ids, pixel_values = model.preprocess_inputs(conversations, [image_input])
55
+ attention_mask = torch.ne(input_ids, text_tokenizer.pad_token_id)
56
+ input_ids = input_ids.unsqueeze(0).to(device=model.device)
57
+ attention_mask = attention_mask.unsqueeze(0).to(device=model.device)
58
+ if image_input is None:
59
+ pixel_values = [None]
60
+ else:
61
+ pixel_values = [pixel_values.to(dtype=visual_tokenizer.dtype, device=visual_tokenizer.device)]
62
+
63
+ with torch.inference_mode():
64
+ gen_kwargs = dict(
65
+ max_new_tokens=1536,
66
+ do_sample=False,
67
+ top_p=None,
68
+ top_k=None,
69
+ temperature=None,
70
+ repetition_penalty=None,
71
+ eos_token_id=model.generation_config.eos_token_id,
72
+ pad_token_id=text_tokenizer.pad_token_id,
73
+ use_cache=True
74
+ )
75
+ response = ""
76
+ thread = Thread(target=model.generate,
77
+ kwargs={"inputs": input_ids,
78
+ "pixel_values": pixel_values,
79
+ "attention_mask": attention_mask,
80
+ "streamer": streamer,
81
+ **gen_kwargs})
82
+ thread.start()
83
+ for new_text in streamer:
84
+ response += new_text
85
+ chatbot[-1][1] = response
86
+ yield chatbot
87
+ thread.join()
88
+ # debug
89
+ print('*'*60)
90
+ print('*'*60)
91
+ print('OVIS_CONV_START')
92
+ for i, (request, answer) in enumerate(chatbot[:-1], 1):
93
+ print(f'Q{i}:\n {request}')
94
+ print(f'A{i}:\n {answer}')
95
+ print('New_Q:\n', text_input)
96
+ print('New_A:\n', response)
97
+ print('OVIS_CONV_END')
98
+
99
+ def clear_chat():
100
+ return [], None, ""
101
+
102
+ with open(f"{cur_dir}/resource/logo.svg", "r", encoding="utf-8") as svg_file:
103
+ svg_content = svg_file.read()
104
+ font_size = "2.5em"
105
+ svg_content = re.sub(r'(<svg[^>]*)(>)', rf'\1 height="{font_size}" style="vertical-align: middle; display: inline-block;"\2', svg_content)
106
+ html = f"""
107
+ <p align="center" style="font-size: {font_size}; line-height: 1;">
108
+ <span style="display: inline-block; vertical-align: middle;">{svg_content}</span>
109
+ <span style="display: inline-block; vertical-align: middle;">{model_name.split('/')[-1]}</span>
110
+ </p>
111
+ <center><font size=3><b>Ovis</b> has been open-sourced on <a href='https://huggingface.co/{model_name}'>😊 Huggingface</a> and <a href='https://github.com/AIDC-AI/Ovis'>🌟 GitHub</a>. If you find Ovis useful, a like❤️ or a star🌟 would be appreciated.</font></center>
112
+ """
113
+
114
+ latex_delimiters_set = [{
115
+ "left": "\\(",
116
+ "right": "\\)",
117
+ "display": True
118
+ }, {
119
+ "left": "\\begin{equation}",
120
+ "right": "\\end{equation}",
121
+ "display": True
122
+ }, {
123
+ "left": "\\begin{align}",
124
+ "right": "\\end{align}",
125
+ "display": True
126
+ }, {
127
+ "left": "\\begin{alignat}",
128
+ "right": "\\end{alignat}",
129
+ "display": True
130
+ }, {
131
+ "left": "\\begin{gather}",
132
+ "right": "\\end{gather}",
133
+ "display": True
134
+ }, {
135
+ "left": "\\begin{CD}",
136
+ "right": "\\end{CD}",
137
+ "display": True
138
+ }, {
139
+ "left": "\\[",
140
+ "right": "\\]",
141
+ "display": True
142
+ }]
143
+
144
+ text_input = gr.Textbox(label="prompt", placeholder="Enter your text here...", lines=1, container=False)
145
+ with gr.Blocks(title=model_name.split('/')[-1], theme=gr.themes.Ocean()) as demo:
146
+ gr.HTML(html)
147
+ with gr.Row():
148
+ with gr.Column(scale=3):
149
+ image_input = gr.Image(label="image", height=350, type="pil")
150
+ gr.Examples(
151
+ examples=[
152
+ [f"{cur_dir}/examples/case0.png", "Find the area of the shaded region."],
153
+ [f"{cur_dir}/examples/case1.png", "explain this model to me."],
154
+ [f"{cur_dir}/examples/case2.png", "What is net profit margin as a percentage of total revenue?"],
155
+ ],
156
+ inputs=[image_input, text_input]
157
+ )
158
+ with gr.Column(scale=7):
159
+ chatbot = gr.Chatbot(label="Ovis", layout="panel", height=600, show_copy_button=True, latex_delimiters=latex_delimiters_set)
160
+ text_input.render()
161
+ with gr.Row():
162
+ send_btn = gr.Button("Send", variant="primary")
163
+ clear_btn = gr.Button("Clear", variant="secondary")
164
+
165
+ send_click_event = send_btn.click(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
166
+ submit_event = text_input.submit(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
167
+ clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input])
168
+
169
+ demo.launch()
examples/case0.png ADDED

Git LFS Details

  • SHA256: 6c58d5fb14f9be6f18b841e707e73dd750bae4a5a0c729ee668313dea43fbef4
  • Pointer size: 131 Bytes
  • Size of remote file: 128 kB
examples/case1.png ADDED

Git LFS Details

  • SHA256: 80bebf1106831041eaa9baef86d12d443360d5f4e5dd37795d841658853b44fc
  • Pointer size: 132 Bytes
  • Size of remote file: 2.84 MB
examples/case2.png ADDED

Git LFS Details

  • SHA256: ec9e80cf2885022c8fd6120b9ecb5a11907c6af15a8e89bdc7e6f891ca618b1e
  • Pointer size: 131 Bytes
  • Size of remote file: 232 kB
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ numpy==1.25.0
2
+ torch==2.4.0
3
+ transformers==4.46.2
4
+ pillow==10.3.0
5
+ # https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
6
+ # https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post2/flash_attn-2.7.0.post2+cu11torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
resource/logo.svg ADDED