Luigi commited on
Commit
37ee1f3
·
1 Parent(s): 3190ad6

improve storage management

Browse files
Files changed (1) hide show
  1. app.py +29 -16
app.py CHANGED
@@ -3,6 +3,7 @@ from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
  import os
5
  import gc
 
6
 
7
  # Available models
8
  MODELS = {
@@ -16,10 +17,10 @@ MODELS = {
16
  "filename": "gemma-3-4b-it-Q5_K_M.gguf",
17
  "description": "Gemma 3 4B IT (Q5_K_M)"
18
  },
19
- "Phi-4-mini-Instruct (Q5_K_M)": {
20
  "repo_id": "unsloth/Phi-4-mini-instruct-GGUF",
21
- "filename": "Phi-4-mini-instruct-Q5_K_M.gguf",
22
- "description": "Phi-4 Mini Instruct (Q5_K_M)"
23
  },
24
  }
25
 
@@ -37,30 +38,42 @@ with st.sidebar:
37
  selected_model = MODELS[selected_model_name]
38
  model_path = os.path.join("models", selected_model["filename"])
39
 
40
- # Initialize model cache state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  if "model_name" not in st.session_state:
42
  st.session_state.model_name = None
43
  if "llm" not in st.session_state:
44
  st.session_state.llm = None
45
 
46
- # Download model if needed
47
- if not os.path.exists(model_path):
48
- hf_hub_download(
49
- repo_id=selected_model["repo_id"],
50
- filename=selected_model["filename"],
51
- local_dir="./models",
52
- local_dir_use_symlinks=False,
53
- )
54
-
55
- # Load model only if it changed
56
  if st.session_state.model_name != selected_model_name:
57
  if st.session_state.llm is not None:
58
- # Clean up old model to free memory
59
  del st.session_state.llm
60
  gc.collect()
 
61
  st.session_state.llm = Llama(
62
  model_path=model_path,
63
- n_ctx=1024, # Reduced for RAM safety
64
  n_threads=2,
65
  n_threads_batch=2,
66
  n_batch=4,
 
3
  from huggingface_hub import hf_hub_download
4
  import os
5
  import gc
6
+ import shutil
7
 
8
  # Available models
9
  MODELS = {
 
17
  "filename": "gemma-3-4b-it-Q5_K_M.gguf",
18
  "description": "Gemma 3 4B IT (Q5_K_M)"
19
  },
20
+ "Phi-4-mini-Instruct (Q4_K_M)": {
21
  "repo_id": "unsloth/Phi-4-mini-instruct-GGUF",
22
+ "filename": "Phi-4-mini-instruct-Q4_K_M.gguf",
23
+ "description": "Phi-4 Mini Instruct (Q4_K_M)"
24
  },
25
  }
26
 
 
38
  selected_model = MODELS[selected_model_name]
39
  model_path = os.path.join("models", selected_model["filename"])
40
 
41
+ # Make sure models dir exists
42
+ os.makedirs("models", exist_ok=True)
43
+
44
+ # Clear old models if new one isn't present
45
+ if not os.path.exists(model_path):
46
+ for file in os.listdir("models"):
47
+ if file.endswith(".gguf"):
48
+ try:
49
+ os.remove(os.path.join("models", file))
50
+ except Exception as e:
51
+ st.warning(f"Failed to delete {file}: {e}")
52
+
53
+ # Download the selected model
54
+ with st.spinner(f"Downloading {selected_model['filename']}..."):
55
+ hf_hub_download(
56
+ repo_id=selected_model["repo_id"],
57
+ filename=selected_model["filename"],
58
+ local_dir="./models",
59
+ local_dir_use_symlinks=False,
60
+ )
61
+
62
+ # Init state
63
  if "model_name" not in st.session_state:
64
  st.session_state.model_name = None
65
  if "llm" not in st.session_state:
66
  st.session_state.llm = None
67
 
68
+ # Load model if changed
 
 
 
 
 
 
 
 
 
69
  if st.session_state.model_name != selected_model_name:
70
  if st.session_state.llm is not None:
 
71
  del st.session_state.llm
72
  gc.collect()
73
+
74
  st.session_state.llm = Llama(
75
  model_path=model_path,
76
+ n_ctx=1024,
77
  n_threads=2,
78
  n_threads_batch=2,
79
  n_batch=4,