Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
|
| 2 |
import os
|
| 3 |
import threading
|
| 4 |
import time
|
|
@@ -18,19 +17,16 @@ OLLAMA_SERVICE_THREAD.start()
|
|
| 18 |
|
| 19 |
print("Giving ollama serve a moment")
|
| 20 |
time.sleep(10)
|
| 21 |
-
subprocess.run("~/ollama
|
| 22 |
|
| 23 |
|
| 24 |
import copy
|
| 25 |
import gradio as gr
|
| 26 |
-
import
|
| 27 |
-
from llama_index.llms.ollama import Ollama
|
| 28 |
-
import llama_index
|
| 29 |
-
from llama_index.core.llms import ChatMessage
|
| 30 |
|
| 31 |
|
| 32 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
| 33 |
-
MODEL_ID = "google/gemma-2-
|
| 34 |
MODEL_NAME = MODEL_ID.split("/")[-1]
|
| 35 |
|
| 36 |
TITLE = "<h1><center>Chatbox</center></h1>"
|
|
@@ -56,33 +52,30 @@ h3 {
|
|
| 56 |
text-align: center;
|
| 57 |
}
|
| 58 |
"""
|
| 59 |
-
@spaces.GPU()
|
| 60 |
def stream_chat(message: str, history: list, temperature: float, context_window: int, top_p: float, top_k: int, penalty: float):
|
| 61 |
print(f'message is - {message}')
|
| 62 |
print(f'history is - {history}')
|
| 63 |
conversation = []
|
| 64 |
for prompt, answer in history:
|
| 65 |
conversation.extend([
|
| 66 |
-
|
| 67 |
-
role
|
| 68 |
-
),
|
| 69 |
-
ChatMessage(role="assistant", content=answer),
|
| 70 |
])
|
| 71 |
-
|
| 72 |
|
| 73 |
print(f"Conversation is -\n{conversation}")
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
messages
|
| 78 |
-
|
| 79 |
-
top_p=top_p,
|
| 80 |
-
top_k=top_k,
|
| 81 |
-
repeat_penalty=penalty,
|
| 82 |
-
context_window=context_window,
|
| 83 |
)
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
|
| 88 |
chatbot = gr.Chatbot(height=600)
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import threading
|
| 3 |
import time
|
|
|
|
| 17 |
|
| 18 |
print("Giving ollama serve a moment")
|
| 19 |
time.sleep(10)
|
| 20 |
+
subprocess.run("~/ollama pull gemma2", shell=True)
|
| 21 |
|
| 22 |
|
| 23 |
import copy
|
| 24 |
import gradio as gr
|
| 25 |
+
import ollama
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
| 29 |
+
MODEL_ID = os.environ.get(MODEL_ID, "google/gemma-2-9b-it")
|
| 30 |
MODEL_NAME = MODEL_ID.split("/")[-1]
|
| 31 |
|
| 32 |
TITLE = "<h1><center>Chatbox</center></h1>"
|
|
|
|
| 52 |
text-align: center;
|
| 53 |
}
|
| 54 |
"""
|
|
|
|
| 55 |
def stream_chat(message: str, history: list, temperature: float, context_window: int, top_p: float, top_k: int, penalty: float):
|
| 56 |
print(f'message is - {message}')
|
| 57 |
print(f'history is - {history}')
|
| 58 |
conversation = []
|
| 59 |
for prompt, answer in history:
|
| 60 |
conversation.extend([
|
| 61 |
+
{"role": "user", "content": prompt},
|
| 62 |
+
{"role": "assistant", "content": answer})
|
|
|
|
|
|
|
| 63 |
])
|
| 64 |
+
conversation.append({"role": "user", "content": message})
|
| 65 |
|
| 66 |
print(f"Conversation is -\n{conversation}")
|
| 67 |
|
| 68 |
+
response = ollama.chat(
|
| 69 |
+
model="gemma2",
|
| 70 |
+
messages=conversation,
|
| 71 |
+
stream=True
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
)
|
| 73 |
+
|
| 74 |
+
message = ""
|
| 75 |
+
for chunk in response:
|
| 76 |
+
message += chunk["message"]["content"]
|
| 77 |
+
yield "", message
|
| 78 |
+
|
| 79 |
|
| 80 |
|
| 81 |
chatbot = gr.Chatbot(height=600)
|