Spaces:
Running
Running
File size: 4,061 Bytes
9ab0176 539566d 1472595 f70fc29 9253654 ef99990 9ab0176 1472595 ef99990 9ab0176 ef99990 1472595 9122113 9ab0176 1472595 9122113 d9faa8c ef99990 1472595 ef99990 1472595 9ab0176 f70fc29 9ab0176 1472595 f70fc29 9ab0176 c2dfdca 9ab0176 f70fc29 9ab0176 f70fc29 9ab0176 f70fc29 9ab0176 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import streamlit as st
from functools import lru_cache
import requests
# Cache model loading to optimize performance
@lru_cache(maxsize=3)
def load_hf_model(model_name):
# Use the Hugging Face Inference API directly
api_url = f"https://api-inference.huggingface.co/models/deepseek-ai/{model_name}"
return api_url
# Load all models at startup
MODELS = {
"DeepSeek-R1-Distill-Qwen-32B": load_hf_model("DeepSeek-R1-Distill-Qwen-32B"),
"DeepSeek-R1": load_hf_model("DeepSeek-R1"),
"DeepSeek-R1-Zero": load_hf_model("DeepSeek-R1-Zero")
}
# --- Chatbot function ---
def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p):
history = history or []
# Get the selected model API URL
api_url = MODELS[model_choice]
# Create payload for the model
payload = {
"inputs": {
"messages": [{"role": "user", "content": input_text}],
"system": system_message,
"max_tokens": max_new_tokens,
"temperature": temperature,
"top_p": top_p
}
}
# Run inference using the selected model
try:
headers = {"Authorization": f"Bearer {st.secrets['HUGGINGFACE_TOKEN']}"}
response = requests.post(api_url, headers=headers, json=payload).json()
# Handle the response format
if isinstance(response, list) and len(response) > 0:
# Assuming the response is a list of generated text
assistant_response = response[0].get("generated_text", "No response generated.")
elif isinstance(response, dict) and "generated_text" in response:
# If the response is a dictionary with generated_text
assistant_response = response["generated_text"]
else:
assistant_response = "Unexpected model response format."
except Exception as e:
assistant_response = f"Error: {str(e)}"
# Append user and assistant messages to history
history.append((input_text, assistant_response))
return history
# --- Streamlit App ---
st.set_page_config(page_title="DeepSeek Chatbot", page_icon="🤖", layout="wide")
# Title and description
st.title("DeepSeek Chatbot")
st.markdown("""
Created by [ruslanmv.com](https://ruslanmv.com/)
This is a demo of different DeepSeek models. Select a model, type your message, and click "Submit".
You can also adjust optional parameters like system message, max new tokens, temperature, and top-p.
""")
# Sidebar for model selection and parameters
with st.sidebar:
st.header("Options")
model_choice = st.radio(
"Choose a Model",
options=list(MODELS.keys()),
index=0
)
st.header("Optional Parameters")
system_message = st.text_area(
"System Message",
value="You are a friendly Chatbot created by ruslanmv.com",
height=100
)
max_new_tokens = st.slider(
"Max New Tokens",
min_value=1,
max_value=4000,
value=200
)
temperature = st.slider(
"Temperature",
min_value=0.10,
max_value=4.00,
value=0.70
)
top_p = st.slider(
"Top-p (nucleus sampling)",
min_value=0.10,
max_value=1.00,
value=0.90
)
# Initialize chat history
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# Display chat history
for user_msg, assistant_msg in st.session_state.chat_history:
with st.chat_message("user"):
st.write(user_msg)
with st.chat_message("assistant"):
st.write(assistant_msg)
# Input box for user message
user_input = st.chat_input("Type your message here...")
# Handle user input
if user_input:
# Add user message to chat history
st.session_state.chat_history = chatbot(
user_input,
st.session_state.chat_history,
model_choice,
system_message,
max_new_tokens,
temperature,
top_p
)
# Rerun to update the chat display
st.rerun() |