Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,4 @@
|
|
| 1 |
import os
|
| 2 |
-
|
| 3 |
-
#remove this if in CPU
|
| 4 |
-
import spaces
|
| 5 |
import threading
|
| 6 |
import time
|
| 7 |
import subprocess
|
|
@@ -21,8 +18,10 @@ OLLAMA_SERVICE_THREAD.start()
|
|
| 21 |
|
| 22 |
print("Giving ollama serve a moment")
|
| 23 |
time.sleep(10)
|
|
|
|
| 24 |
# Modify the model to what you want
|
| 25 |
-
model = "gemma2
|
|
|
|
| 26 |
subprocess.run(f"~/ollama pull {model}", shell=True)
|
| 27 |
|
| 28 |
|
|
@@ -32,7 +31,7 @@ from ollama import Client
|
|
| 32 |
client = Client(host='http://localhost:11434', timeout=120)
|
| 33 |
|
| 34 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
| 35 |
-
MODEL_ID = os.environ.get("MODEL_ID", "google/gemma-2-
|
| 36 |
MODEL_NAME = MODEL_ID.split("/")[-1]
|
| 37 |
|
| 38 |
TITLE = "<h1><center>ollama-Chat</center></h1>"
|
|
@@ -42,7 +41,7 @@ DESCRIPTION = f"""
|
|
| 42 |
<center>
|
| 43 |
<p>Feel free to test models with ollama.
|
| 44 |
<br>
|
| 45 |
-
Easy to modify and running models
|
| 46 |
</p>
|
| 47 |
</center>
|
| 48 |
"""
|
|
@@ -59,7 +58,6 @@ h3 {
|
|
| 59 |
}
|
| 60 |
"""
|
| 61 |
|
| 62 |
-
# Remove this if in CPU
|
| 63 |
|
| 64 |
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
| 65 |
|
|
@@ -84,8 +82,6 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
|
|
| 84 |
'top_k': top_k,
|
| 85 |
'repeat_penalty': penalty,
|
| 86 |
'low_vram': True,
|
| 87 |
-
'main_gpu': 0,
|
| 88 |
-
'num_gpu': 1,
|
| 89 |
},
|
| 90 |
)
|
| 91 |
|
|
|
|
| 1 |
import os
|
|
|
|
|
|
|
|
|
|
| 2 |
import threading
|
| 3 |
import time
|
| 4 |
import subprocess
|
|
|
|
| 18 |
|
| 19 |
print("Giving ollama serve a moment")
|
| 20 |
time.sleep(10)
|
| 21 |
+
|
| 22 |
# Modify the model to what you want
|
| 23 |
+
model = "gemma2"
|
| 24 |
+
|
| 25 |
subprocess.run(f"~/ollama pull {model}", shell=True)
|
| 26 |
|
| 27 |
|
|
|
|
| 31 |
client = Client(host='http://localhost:11434', timeout=120)
|
| 32 |
|
| 33 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
| 34 |
+
MODEL_ID = os.environ.get("MODEL_ID", "google/gemma-2-9b-it")
|
| 35 |
MODEL_NAME = MODEL_ID.split("/")[-1]
|
| 36 |
|
| 37 |
TITLE = "<h1><center>ollama-Chat</center></h1>"
|
|
|
|
| 41 |
<center>
|
| 42 |
<p>Feel free to test models with ollama.
|
| 43 |
<br>
|
| 44 |
+
Easy to modify and running models you want.
|
| 45 |
</p>
|
| 46 |
</center>
|
| 47 |
"""
|
|
|
|
| 58 |
}
|
| 59 |
"""
|
| 60 |
|
|
|
|
| 61 |
|
| 62 |
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
| 63 |
|
|
|
|
| 82 |
'top_k': top_k,
|
| 83 |
'repeat_penalty': penalty,
|
| 84 |
'low_vram': True,
|
|
|
|
|
|
|
| 85 |
},
|
| 86 |
)
|
| 87 |
|