Spaces:
Sleeping
Sleeping
File size: 3,995 Bytes
92c50d2 cb26c51 d6ed3af f16d5ff 99261e1 88bb70a f2617d3 aeb9fdb 10d992f ffb230f 7dd7f9a da3c5f3 d6ed3af bb2df8b f2617d3 fa7cf7b cb26c51 99261e1 92c50d2 da3c5f3 4a5db3e 92c50d2 7958865 02c7617 93116a8 46e0200 27dde00 02c7617 46e0200 02c7617 46e0200 5dfffe1 0a93e35 5dfffe1 1aea96c 5dfffe1 93116a8 99dd0b4 805aff3 957eb79 93116a8 3f4b192 02c7617 aeb9fdb 46e0200 02c7617 3f4b192 2c71536 3f4b192 92c50d2 02c7617 92c50d2 91fb6a1 92c50d2 74cc87b fdc7b54 6076ef5 92c50d2 805aff3 9f95576 92c50d2 46e0200 79158b5 46e0200 92c50d2 88bb70a 92c50d2 8dadb61 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import gradio as gr
from huggingface_hub import InferenceClient
import spaces #0.32.0
import torch
import os
import platform
import requests
model = ""
duration = None
token = os.getenv('deepseekv2')
provider = None #'fal-ai' #None #replicate # sambanova
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
print(f"CUDA version: {torch.version.cuda}")
print(f"Python version: {platform.python_version()}")
print(f"Pytorch version: {torch.__version__}")
print(f"Gradio version: {gr. __version__}")
# print(f"HFhub version: {huggingface_hub.__version__}")
"""
Packages ::::::::::
Is CUDA available: True
CUDA device: NVIDIA A100-SXM4-80GB MIG 3g.40gb
CUDA version: 12.1
Python version: 3.10.13
Pytorch version: 2.4.0+cu121
Gradio version: 5.0.1
"""
def choose_model(model_name):
if model_name == "DeepSeek-R1-Distill-Qwen-1.5B":
model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
elif model_name == "DeepSeek-R1-Distill-Qwen-32B":
model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
elif model_name == "Llama3-8b-Instruct":
model = "meta-llama/Meta-Llama-3-8B-Instruct"
elif model_name == "Llama3.1-8b-Instruct":
model = "meta-llama/Llama-3.1-8B-Instruct"
elif model_name == "Llama2-13b-chat":
model = "meta-llama/Llama-2-13b-chat-hf"
elif model_name == "Gemma-2-2b":
model = "google/gemma-2-2b-it"
elif model_name == "Gemma-7b":
model = "google/gemma-7b"
elif model_name == "Mixtral-8x7B-Instruct":
model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
elif model_name == "Microsoft-phi-2":
model = "microsoft/phi-2"
elif model_name == "Qwen2.5-Coder-32B-Instruct":
model = "Qwen/Qwen2.5-Coder-32B-Instruct"
else: # default to zephyr if no model chosen
model = "HuggingFaceH4/zephyr-7b-beta"
return model
@spaces.GPU(duration=duration)
def respond(message, history: list[tuple[str, str]], model, system_message, max_tokens, temperature, top_p):
print(model)
model_name = choose_model(model)
client = InferenceClient(model_name, provider=provider, token=os.getenv('deepseekv2'))
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p):
token = message.choices[0].delta.content
response += token
yield response
demo = gr.ChatInterface(
respond,
title="Ask me anything",
description="Hi there! I am your friendly AI chatbot. Choose from different language models under the Additional Inputs tab below.",
examples=[["Explain quantum computing"], ["Explain forex trading"], ["What is the capital of China?"], ["Make a poem about nature"]],
additional_inputs=[
gr.Dropdown(["DeepSeek-R1-Distill-Qwen-1.5B", "DeepSeek-R1-Distill-Qwen-32B", "Gemma-2-2b", "Gemma-7b", "Llama2-13b-chat", "Llama3-8b-Instruct", "Llama3.1-8b-Instruct", "Microsoft-phi-2", "Mixtral-8x7B-Instruct", "Qwen2.5-Coder-32B-Instruct", "Zephyr-7b-beta"], label="Select Model"),
gr.Textbox(value="You are a friendly and helpful Chatbot, be concise and straight to the point, avoid excessive reasoning.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
]
)
if __name__ == "__main__":
demo.launch(share=True)
|