Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,904 Bytes
0ff6c39 cd26609 0ff6c39 cd26609 0813164 cd26609 0813164 cd26609 37ee1f3 cd26609 37ee1f3 cd26609 d554072 cd26609 d554072 cd26609 6e8312c 37ee1f3 d554072 0813164 37ee1f3 0813164 37ee1f3 0813164 37ee1f3 d554072 0813164 37ee1f3 d554072 6e8312c d554072 6e8312c 0ff6c39 d554072 3e4847c d554072 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import streamlit as st
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
import gc
# Available models
MODELS = {
"Qwen2.5-7B-Instruct (Q2_K)": {
"repo_id": "Qwen/Qwen2.5-7B-Instruct-GGUF",
"filename": "qwen2.5-7b-instruct-q2_k.gguf",
"description": "Qwen2.5-7B Instruct (Q2_K)"
},
"Gemma-3-4B-IT (Q4_K_M)": {
"repo_id": "unsloth/gemma-3-4b-it-GGUF",
"filename": "gemma-3-4b-it-Q4_K_M.gguf",
"description": "Gemma 3 4B IT (Q4_K_M)"
},
"Phi-4-mini-Instruct (Q4_K_M)": {
"repo_id": "unsloth/Phi-4-mini-instruct-GGUF",
"filename": "Phi-4-mini-instruct-Q4_K_M.gguf",
"description": "Phi-4 Mini Instruct (Q4_K_M)"
},
"Meta-Llama-3.1-8B-Instruct (Q2_K)": {
"repo_id": "MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF",
"filename": "Meta-Llama-3.1-8B-Instruct.Q2_K.gguf",
"description": "Meta Llama 3.1 8B Instruct (Q2_K)"
},
"DeepSeek-R1-Distill-Llama-8B (Q2_K)": {
"repo_id": "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF",
"filename": "DeepSeek-R1-Distill-Llama-8B-Q2_K.gguf",
"description": "DeepSeek R1 Distill Llama 8B (Q2_K)"
},
"Mistral-7B-Instruct-v0.3 (IQ3_XS)": {
"repo_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF",
"filename": "Mistral-7B-Instruct-v0.3.IQ3_XS.gguf",
"description": "Mistral 7B Instruct v0.3 (IQ3_XS)"
},
"Qwen2.5-Coder-7B-Instruct (Q2_K)": {
"repo_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
"filename": "qwen2.5-coder-7b-instruct-q2_k.gguf",
"description": "Qwen2.5 Coder 7B Instruct (Q2_K)"
},
}
# Sidebar for model selection and settings
with st.sidebar:
st.header("⚙️ Settings")
selected_model_name = st.selectbox("Select Model", list(MODELS.keys()))
system_prompt = st.text_area("System Prompt", value="You are a helpful assistant.", height=80)
max_tokens = st.slider("Max tokens", 64, 2048, 512, step=32)
temperature = st.slider("Temperature", 0.1, 2.0, 0.7)
top_k = st.slider("Top-K", 1, 100, 40)
top_p = st.slider("Top-P", 0.1, 1.0, 0.95)
repeat_penalty = st.slider("Repetition Penalty", 1.0, 2.0, 1.1)
# Model info
selected_model = MODELS[selected_model_name]
model_path = os.path.join("models", selected_model["filename"])
# Ensure model directory exists
os.makedirs("models", exist_ok=True)
# Function to clean up old models
def cleanup_old_models():
for f in os.listdir("models"):
if f.endswith(".gguf") and f != selected_model["filename"]:
try:
os.remove(os.path.join("models", f))
except Exception as e:
st.warning(f"Couldn't delete old model {f}: {e}")
# Function to download the selected model
def download_model():
with st.spinner(f"Downloading {selected_model['filename']}..."):
hf_hub_download(
repo_id=selected_model["repo_id"],
filename=selected_model["filename"],
local_dir="./models",
local_dir_use_symlinks=False,
)
# Function to validate or download the model
def validate_or_download_model():
if not os.path.exists(model_path):
cleanup_old_models()
download_model()
try:
# Attempt to load the model with minimal resources to validate
_ = Llama(model_path=model_path, n_ctx=16, n_threads=1)
except Exception as e:
st.warning(f"Model file was invalid or corrupt: {e}\nRedownloading...")
try:
os.remove(model_path)
except:
pass
cleanup_old_models()
download_model()
# Validate or download the selected model
validate_or_download_model()
# Load model if changed
if "model_name" not in st.session_state or st.session_state.model_name != selected_model_name:
if "llm" in st.session_state and st.session_state.llm
|