Update app.py
Browse files
app.py
CHANGED
@@ -23,15 +23,11 @@ from exception import CustomExceptionHandling
|
|
23 |
|
24 |
# Download gguf model files
|
25 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
26 |
-
|
|
|
27 |
hf_hub_download(
|
28 |
-
repo_id="
|
29 |
-
filename="
|
30 |
-
local_dir="./models",
|
31 |
-
)
|
32 |
-
hf_hub_download(
|
33 |
-
repo_id="bartowski/google_gemma-3-1b-it-GGUF",
|
34 |
-
filename="google_gemma-3-1b-it-Q5_K_M.gguf",
|
35 |
local_dir="./models",
|
36 |
)
|
37 |
|
@@ -64,6 +60,19 @@ description = """Gemma 3 is a family of lightweight, multimodal open models that
|
|
64 |
llm = None
|
65 |
llm_model = None
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
def respond(
|
68 |
message: str,
|
69 |
history: List[Tuple[str, str]],
|
@@ -97,6 +106,7 @@ def respond(
|
|
97 |
global llm
|
98 |
global llm_model
|
99 |
|
|
|
100 |
# Load the model
|
101 |
if llm is None or llm_model != model:
|
102 |
llm = Llama(
|
@@ -109,6 +119,9 @@ def respond(
|
|
109 |
n_threads_batch=8,
|
110 |
)
|
111 |
llm_model = model
|
|
|
|
|
|
|
112 |
provider = LlamaCppPythonProvider(llm)
|
113 |
|
114 |
# Create the agent
|
@@ -172,10 +185,9 @@ demo = gr.ChatInterface(
|
|
172 |
additional_inputs=[
|
173 |
gr.Dropdown(
|
174 |
choices=[
|
175 |
-
"
|
176 |
-
"google_gemma-3-1b-it-Q5_K_M.gguf",
|
177 |
],
|
178 |
-
value="
|
179 |
label="Model",
|
180 |
info="Select the AI model to use for chat",
|
181 |
),
|
|
|
23 |
|
24 |
# Download gguf model files
|
25 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
26 |
+
os.makedirs("models",exist_ok=True)
|
27 |
+
#mtsdurica/madlad400-3b-mt-Q8_0-GGUF
|
28 |
hf_hub_download(
|
29 |
+
repo_id="mtsdurica/madlad400-3b-mt-Q8_0-GGUF",
|
30 |
+
filename="madlad400-3b-mt-q8_0.gguf",
|
|
|
|
|
|
|
|
|
|
|
31 |
local_dir="./models",
|
32 |
)
|
33 |
|
|
|
60 |
llm = None
|
61 |
llm_model = None
|
62 |
|
63 |
+
def trans(text):
|
64 |
+
llama = llm
|
65 |
+
text = f"<2ja>{text}".encode()
|
66 |
+
tokens = llama.tokenize(text)
|
67 |
+
llama.encode(tokens)
|
68 |
+
tokens = [llama.decoder_start_token()]
|
69 |
+
buf = ""
|
70 |
+
for token in llama.generate(tokens, top_k=0, top_p=0.95, temp=0, repeat_penalty=1.0):
|
71 |
+
buf += llama.detokenize([token]).decode()
|
72 |
+
if token == llama.token_eos():
|
73 |
+
break
|
74 |
+
return buf
|
75 |
+
|
76 |
def respond(
|
77 |
message: str,
|
78 |
history: List[Tuple[str, str]],
|
|
|
106 |
global llm
|
107 |
global llm_model
|
108 |
|
109 |
+
#llama = Llama("madlad400-3b-mt-q8_0.gguf")
|
110 |
# Load the model
|
111 |
if llm is None or llm_model != model:
|
112 |
llm = Llama(
|
|
|
119 |
n_threads_batch=8,
|
120 |
)
|
121 |
llm_model = model
|
122 |
+
|
123 |
+
return trans(message)
|
124 |
+
|
125 |
provider = LlamaCppPythonProvider(llm)
|
126 |
|
127 |
# Create the agent
|
|
|
185 |
additional_inputs=[
|
186 |
gr.Dropdown(
|
187 |
choices=[
|
188 |
+
"madlad400-3b-mt-q8_0.gguf",
|
|
|
189 |
],
|
190 |
+
value="madlad400-3b-mt-q8_0.gguf",
|
191 |
label="Model",
|
192 |
info="Select the AI model to use for chat",
|
193 |
),
|