Spaces:

jonaschua
/

deepseekv2

Runtime error

File size: 3,995 Bytes

92c50d2
 
cb26c51
d6ed3af
f16d5ff
99261e1
88bb70a
f2617d3
aeb9fdb
10d992f
ffb230f
7dd7f9a
da3c5f3
d6ed3af
 
bb2df8b
f2617d3
 
 
fa7cf7b
cb26c51
99261e1
92c50d2
da3c5f3
4a5db3e
 
 
 
 
 
92c50d2
 
7958865
02c7617
93116a8
46e0200
27dde00
 
 
02c7617
 
46e0200
02c7617
46e0200
 
 
5dfffe1
0a93e35
 
5dfffe1
 
 
1aea96c
 
 
5dfffe1
 
93116a8
99dd0b4
 
 
805aff3
 
 
957eb79
93116a8
3f4b192
 
02c7617
 
aeb9fdb
46e0200
02c7617
 
3f4b192
 
2c71536
3f4b192
92c50d2
 
 
 
 
 
 
 
 
 
 
 
02c7617
92c50d2
 
 
 
 
91fb6a1
92c50d2
74cc87b
fdc7b54
 
6076ef5
92c50d2
805aff3
9f95576
92c50d2
 
46e0200
79158b5
46e0200
92c50d2
 
88bb70a
92c50d2
8dadb61

import gradio as gr
from huggingface_hub import InferenceClient
import spaces #0.32.0
import torch
import os
import platform
import requests

model = ""
duration = None
token = os.getenv('deepseekv2')
provider = None #'fal-ai' #None #replicate # sambanova

print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
print(f"CUDA version: {torch.version.cuda}")
print(f"Python version: {platform.python_version()}")
print(f"Pytorch version: {torch.__version__}")
print(f"Gradio version: {gr. __version__}")
# print(f"HFhub version: {huggingface_hub.__version__}")


"""
Packages ::::::::::
Is CUDA available: True
CUDA device: NVIDIA A100-SXM4-80GB MIG 3g.40gb
CUDA version: 12.1
Python version: 3.10.13
Pytorch version: 2.4.0+cu121
Gradio version: 5.0.1
"""


def choose_model(model_name):
    if model_name == "DeepSeek-R1-Distill-Qwen-1.5B":
        model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

    elif model_name == "DeepSeek-R1-Distill-Qwen-32B":
        model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
        
    elif model_name == "Llama3-8b-Instruct":    
        model = "meta-llama/Meta-Llama-3-8B-Instruct"

    elif model_name == "Llama3.1-8b-Instruct":
        model = "meta-llama/Llama-3.1-8B-Instruct"

    elif model_name == "Llama2-13b-chat":
        model = "meta-llama/Llama-2-13b-chat-hf"

    elif model_name == "Gemma-2-2b":
        model = "google/gemma-2-2b-it"

    elif model_name == "Gemma-7b":
        model = "google/gemma-7b"
    
    elif model_name == "Mixtral-8x7B-Instruct":
        model = "mistralai/Mixtral-8x7B-Instruct-v0.1"

    elif model_name == "Microsoft-phi-2":
        model = "microsoft/phi-2"

    elif model_name == "Qwen2.5-Coder-32B-Instruct":
        model = "Qwen/Qwen2.5-Coder-32B-Instruct"

    else:    # default to zephyr if no model chosen
        model = "HuggingFaceH4/zephyr-7b-beta"
    
    return model
    

@spaces.GPU(duration=duration)
def respond(message, history: list[tuple[str, str]], model, system_message, max_tokens, temperature, top_p):

    print(model)
    model_name = choose_model(model)

    client = InferenceClient(model_name, provider=provider, token=os.getenv('deepseekv2'))
    
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p):
        token = message.choices[0].delta.content

        response += token
        yield response

    
demo = gr.ChatInterface(
    respond,
    title="Ask me anything",
    description="Hi there! I am your friendly AI chatbot. Choose from different language models under the Additional Inputs tab below.",
    examples=[["Explain quantum computing"], ["Explain forex trading"], ["What is the capital of China?"], ["Make a poem about nature"]],
    additional_inputs=[
        gr.Dropdown(["DeepSeek-R1-Distill-Qwen-1.5B", "DeepSeek-R1-Distill-Qwen-32B", "Gemma-2-2b", "Gemma-7b", "Llama2-13b-chat", "Llama3-8b-Instruct", "Llama3.1-8b-Instruct", "Microsoft-phi-2", "Mixtral-8x7B-Instruct", "Qwen2.5-Coder-32B-Instruct", "Zephyr-7b-beta"], label="Select Model"),
        gr.Textbox(value="You are a friendly and helpful Chatbot, be concise and straight to the point, avoid excessive reasoning.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
        
    ]
)


if __name__ == "__main__":
    demo.launch(share=True)