File size: 3,904 Bytes
0ff6c39
 
 
cd26609
 
0ff6c39
cd26609
 
 
 
 
 
 
0813164
cd26609
0813164
 
cd26609
37ee1f3
cd26609
37ee1f3
 
cd26609
d554072
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd26609
 
d554072
cd26609
 
 
 
 
 
 
 
 
 
 
 
 
 
6e8312c
37ee1f3
 
d554072
0813164
 
 
37ee1f3
0813164
37ee1f3
0813164
37ee1f3
d554072
0813164
37ee1f3
 
 
 
 
 
 
 
d554072
6e8312c
 
 
 
d554072
 
 
 
 
6e8312c
 
 
 
 
 
0ff6c39
d554072
 
3e4847c
d554072
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import streamlit as st
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
import gc

# Available models
MODELS = {
    "Qwen2.5-7B-Instruct (Q2_K)": {
        "repo_id": "Qwen/Qwen2.5-7B-Instruct-GGUF",
        "filename": "qwen2.5-7b-instruct-q2_k.gguf",
        "description": "Qwen2.5-7B Instruct (Q2_K)"
    },
    "Gemma-3-4B-IT (Q4_K_M)": {
        "repo_id": "unsloth/gemma-3-4b-it-GGUF",
        "filename": "gemma-3-4b-it-Q4_K_M.gguf",
        "description": "Gemma 3 4B IT (Q4_K_M)"
    },
    "Phi-4-mini-Instruct (Q4_K_M)": {
        "repo_id": "unsloth/Phi-4-mini-instruct-GGUF",
        "filename": "Phi-4-mini-instruct-Q4_K_M.gguf",
        "description": "Phi-4 Mini Instruct (Q4_K_M)"
    },
    "Meta-Llama-3.1-8B-Instruct (Q2_K)": {
        "repo_id": "MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF",
        "filename": "Meta-Llama-3.1-8B-Instruct.Q2_K.gguf",
        "description": "Meta Llama 3.1 8B Instruct (Q2_K)"
    },
    "DeepSeek-R1-Distill-Llama-8B (Q2_K)": {
        "repo_id": "unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF",
        "filename": "DeepSeek-R1-Distill-Llama-8B-Q2_K.gguf",
        "description": "DeepSeek R1 Distill Llama 8B (Q2_K)"
    },
    "Mistral-7B-Instruct-v0.3 (IQ3_XS)": {
        "repo_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF",
        "filename": "Mistral-7B-Instruct-v0.3.IQ3_XS.gguf",
        "description": "Mistral 7B Instruct v0.3 (IQ3_XS)"
    },
    "Qwen2.5-Coder-7B-Instruct (Q2_K)": {
        "repo_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
        "filename": "qwen2.5-coder-7b-instruct-q2_k.gguf",
        "description": "Qwen2.5 Coder 7B Instruct (Q2_K)"
    },
}

# Sidebar for model selection and settings
with st.sidebar:
    st.header("⚙️ Settings")
    selected_model_name = st.selectbox("Select Model", list(MODELS.keys()))
    system_prompt = st.text_area("System Prompt", value="You are a helpful assistant.", height=80)
    max_tokens = st.slider("Max tokens", 64, 2048, 512, step=32)
    temperature = st.slider("Temperature", 0.1, 2.0, 0.7)
    top_k = st.slider("Top-K", 1, 100, 40)
    top_p = st.slider("Top-P", 0.1, 1.0, 0.95)
    repeat_penalty = st.slider("Repetition Penalty", 1.0, 2.0, 1.1)

# Model info
selected_model = MODELS[selected_model_name]
model_path = os.path.join("models", selected_model["filename"])

# Ensure model directory exists
os.makedirs("models", exist_ok=True)

# Function to clean up old models
def cleanup_old_models():
    for f in os.listdir("models"):
        if f.endswith(".gguf") and f != selected_model["filename"]:
            try:
                os.remove(os.path.join("models", f))
            except Exception as e:
                st.warning(f"Couldn't delete old model {f}: {e}")

# Function to download the selected model
def download_model():
    with st.spinner(f"Downloading {selected_model['filename']}..."):
        hf_hub_download(
            repo_id=selected_model["repo_id"],
            filename=selected_model["filename"],
            local_dir="./models",
            local_dir_use_symlinks=False,
        )

# Function to validate or download the model
def validate_or_download_model():
    if not os.path.exists(model_path):
        cleanup_old_models()
        download_model()
    try:
        # Attempt to load the model with minimal resources to validate
        _ = Llama(model_path=model_path, n_ctx=16, n_threads=1)
    except Exception as e:
        st.warning(f"Model file was invalid or corrupt: {e}\nRedownloading...")
        try:
            os.remove(model_path)
        except:
            pass
        cleanup_old_models()
        download_model()

# Validate or download the selected model
validate_or_download_model()

# Load model if changed
if "model_name" not in st.session_state or st.session_state.model_name != selected_model_name:
    if "llm" in st.session_state and st.session_state.llm