simonraj commited on
Commit
dd2a3b9
Β·
verified Β·
1 Parent(s): f405eaa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ # Installing flash_attn
3
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
4
+
5
+ import gradio as gr
6
+ from PIL import Image
7
+ from transformers import AutoModelForCausalLM
8
+ from transformers import AutoProcessor
9
+ from transformers import TextIteratorStreamer
10
+ import time
11
+ from threading import Thread
12
+ import torch
13
+ import spaces
14
+
15
+ model_id = "microsoft/Phi-3-vision-128k-instruct"
16
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", trust_remote_code=True, torch_dtype="auto")
17
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
18
+ model.to("cuda:0")
19
+
20
+ PLACEHOLDER = """
21
+ <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
22
+ <img src="https://cdn-thumbnails.huggingface.co/social-thumbnails/models/microsoft/Phi-3-vision-128k-instruct.png" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55;">
23
+ <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Fitness Coach: Arnold Style</h1>
24
+ <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Upload your exercise photo and get short, powerful coaching tips from the best!</p>
25
+ </div>
26
+ """
27
+
28
+ @spaces.GPU
29
+ def bot_streaming(message, history):
30
+ print(f'message is - {message}')
31
+ print(f'history is - {history}')
32
+ if message["files"]:
33
+ if type(message["files"][-1]) == dict:
34
+ image = message["files"][-1]["path"]
35
+ else:
36
+ image = message["files"][-1]
37
+ else:
38
+ for hist in history:
39
+ if type(hist[0]) == tuple:
40
+ image = hist[0][0]
41
+ try:
42
+ if image is None:
43
+ raise gr.Error("You need to upload an image for Phi3-Vision to work. Close the error and try again with an Image.")
44
+ except NameError:
45
+ raise gr.Error("You need to upload an image for Phi3-Vision to work. Close the error and try again with an Image.")
46
+
47
+ conversation = []
48
+ flag = False
49
+ for user, assistant in history:
50
+ if assistant is None:
51
+ flag = True
52
+ conversation.extend([{"role": "user", "content": ""}])
53
+ continue
54
+ if flag == True:
55
+ conversation[0]['content'] = f"<|image_1|>\n{user}"
56
+ conversation.extend([{"role": "assistant", "content": assistant}])
57
+ flag = False
58
+ continue
59
+ conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
60
+
61
+ if len(history) == 0:
62
+ conversation.append({"role": "user", "content": f"<|image_1|>\n{message['text']}"})
63
+ else:
64
+ conversation.append({"role": "user", "content": message['text']})
65
+ print(f"prompt is -\n{conversation}")
66
+ prompt = processor.tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
67
+ image = Image.open(image)
68
+ inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
69
+
70
+ streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": True, "skip_prompt": True, 'clean_up_tokenization_spaces': False,})
71
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=280, do_sample=False, temperature=0.0, eos_token_id=processor.tokenizer.eos_token_id,)
72
+
73
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
74
+ thread.start()
75
+
76
+ buffer = ""
77
+ for new_text in streamer:
78
+ buffer += new_text
79
+ yield buffer
80
+
81
+ chatbot = gr.Chatbot(scale=1, placeholder=PLACEHOLDER)
82
+ chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
83
+ with gr.Blocks(fill_height=True,) as demo:
84
+ gr.ChatInterface(
85
+ fn=bot_streaming,
86
+ title="Fitness Coach: Arnold Style",
87
+ examples=[
88
+ {"text": "Identify and provide coaching cues for this exercise.", "files": ["./squat.jpg"]},
89
+ {"text": "What improvements can I make?", "files": ["./pushup.jpg"]},
90
+ {"text": "How is my form?", "files": ["./plank.jpg"]},
91
+ {"text": "Give me some tips to improve my deadlift.", "files": ["./deadlift.jpg"]}
92
+ ],
93
+ description="Upload an image of your exercise, and the fitness coach will identify the exercise and provide concise coaching cues to improve your form. Responses are limited to 280 characters.",
94
+ stop_btn="Stop Generation",
95
+ multimodal=True,
96
+ textbox=chat_input,
97
+ chatbot=chatbot,
98
+ cache_examples=False,
99
+ examples_per_page=3
100
+ )
101
+
102
+ demo.queue()
103
+ demo.launch(debug=True, quiet=True)