ruslanmv's picture
Update app.py
3b55d4b verified
raw
history blame
5.63 kB
########################################
# app.py
########################################
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
# We define a cache to load pipelines for each model only once.
@st.cache_resource
def load_text_generation_pipeline(model_name: str):
"""
Loads a text-generation pipeline from the Hugging Face Hub.
"""
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype="auto", # or torch.float16 if GPU is available
device_map="auto" # automatically map layers to available GPU(s)
)
text_generation = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer
)
return text_generation
def generate_response(
text_generation,
system_prompt: str,
conversation_history: list,
user_query: str,
max_new_tokens: int,
temperature: float,
top_p: float
):
"""
Generates a response from the language model given the system prompt,
conversation history, and user query with specified parameters.
"""
# Construct a prompt that includes the system role, conversation history, and the new user input.
# Adjust format depending on your model's instructions format.
# Here we do a simple approach: system prompt + turn-by-turn conversation.
full_prompt = system_prompt.strip()
for (speaker, text) in conversation_history:
if speaker == "user":
full_prompt += f"\nUser: {text}"
else:
full_prompt += f"\nAssistant: {text}"
# Add the new user query
full_prompt += f"\nUser: {user_query}\nAssistant:"
# Use the pipeline to generate text
outputs = text_generation(
full_prompt,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True
)
# The pipeline returns a list of generated sequences; get the text from the first one
generated_text = outputs[0]["generated_text"]
# Extract just the new answer part from the generated text
# Since we appended "Assistant:" at the end, the model's response is everything after that
answer = generated_text.split("Assistant:")[-1].strip()
return answer
def main():
st.title("Streamlit Chatbot with Model Selection")
st.markdown(
"""
**System message**: You are a friendly Chatbot created by [ruslanmv.com](https://ruslanmv.com)
Below you can select the model, adjust parameters, and begin chatting!
"""
)
# Sidebar for model selection and parameters
st.sidebar.header("Select Model & Parameters")
model_name = st.sidebar.selectbox(
"Choose a model:",
[
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
"deepseek-ai/DeepSeek-R1",
"deepseek-ai/DeepSeek-R1-Zero"
]
)
max_new_tokens = st.sidebar.slider(
"Max new tokens",
min_value=1,
max_value=4000,
value=1024,
step=1
)
temperature = st.sidebar.slider(
"Temperature",
min_value=0.1,
max_value=4.0,
value=1.0,
step=0.1
)
top_p = st.sidebar.slider(
"Top-p (nucleus sampling)",
min_value=0.1,
max_value=1.0,
value=0.9,
step=0.05
)
# The system "role" content
system_message = (
"You are a friendly Chatbot created by ruslanmv.com. "
"You answer user questions in a concise and helpful way."
)
# Load the chosen model
text_generation_pipeline = load_text_generation_pipeline(model_name)
# We'll keep conversation history in session_state
if "conversation" not in st.session_state:
st.session_state["conversation"] = [] # List of tuples (speaker, text)
# Display conversation so far
# Each element in st.session_state["conversation"] is ("user" or "assistant", message_text)
for speaker, text in st.session_state["conversation"]:
if speaker == "user":
st.markdown(f"<div style='text-align:left; color:blue'><strong>User:</strong> {text}</div>", unsafe_allow_html=True)
else:
st.markdown(f"<div style='text-align:left; color:green'><strong>Assistant:</strong> {text}</div>", unsafe_allow_html=True)
# User input text box
user_input = st.text_input("Your message", "")
# When user hits "Send"
if st.button("Send"):
if user_input.strip():
# 1) Add user query to conversation
st.session_state["conversation"].append(("user", user_input.strip()))
# 2) Generate a response
with st.spinner("Thinking..."):
answer = generate_response(
text_generation=text_generation_pipeline,
system_prompt=system_message,
conversation_history=st.session_state["conversation"],
user_query=user_input.strip(),
max_new_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p
)
# 3) Add assistant answer to conversation
st.session_state["conversation"].append(("assistant", answer))
# 4) Rerun to display
st.experimental_rerun()
# Optional: Provide a button to clear the conversation
if st.button("Clear Conversation"):
st.session_state["conversation"] = []
st.experimental_rerun()
if __name__ == "__main__":
main()