Akjava commited on
Commit
358544d
·
verified ·
1 Parent(s): c62145d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -11
app.py CHANGED
@@ -23,15 +23,11 @@ from exception import CustomExceptionHandling
23
 
24
  # Download gguf model files
25
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
26
-
 
27
  hf_hub_download(
28
- repo_id="bartowski/google_gemma-3-1b-it-GGUF",
29
- filename="google_gemma-3-1b-it-Q6_K.gguf",
30
- local_dir="./models",
31
- )
32
- hf_hub_download(
33
- repo_id="bartowski/google_gemma-3-1b-it-GGUF",
34
- filename="google_gemma-3-1b-it-Q5_K_M.gguf",
35
  local_dir="./models",
36
  )
37
 
@@ -64,6 +60,19 @@ description = """Gemma 3 is a family of lightweight, multimodal open models that
64
  llm = None
65
  llm_model = None
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def respond(
68
  message: str,
69
  history: List[Tuple[str, str]],
@@ -97,6 +106,7 @@ def respond(
97
  global llm
98
  global llm_model
99
 
 
100
  # Load the model
101
  if llm is None or llm_model != model:
102
  llm = Llama(
@@ -109,6 +119,9 @@ def respond(
109
  n_threads_batch=8,
110
  )
111
  llm_model = model
 
 
 
112
  provider = LlamaCppPythonProvider(llm)
113
 
114
  # Create the agent
@@ -172,10 +185,9 @@ demo = gr.ChatInterface(
172
  additional_inputs=[
173
  gr.Dropdown(
174
  choices=[
175
- "google_gemma-3-1b-it-Q6_K.gguf",
176
- "google_gemma-3-1b-it-Q5_K_M.gguf",
177
  ],
178
- value="google_gemma-3-1b-it-Q5_K_M.gguf",
179
  label="Model",
180
  info="Select the AI model to use for chat",
181
  ),
 
23
 
24
  # Download gguf model files
25
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
26
+ os.makedirs("models",exist_ok=True)
27
+ #mtsdurica/madlad400-3b-mt-Q8_0-GGUF
28
  hf_hub_download(
29
+ repo_id="mtsdurica/madlad400-3b-mt-Q8_0-GGUF",
30
+ filename="madlad400-3b-mt-q8_0.gguf",
 
 
 
 
 
31
  local_dir="./models",
32
  )
33
 
 
60
  llm = None
61
  llm_model = None
62
 
63
+ def trans(text):
64
+ llama = llm
65
+ text = f"<2ja>{text}".encode()
66
+ tokens = llama.tokenize(text)
67
+ llama.encode(tokens)
68
+ tokens = [llama.decoder_start_token()]
69
+ buf = ""
70
+ for token in llama.generate(tokens, top_k=0, top_p=0.95, temp=0, repeat_penalty=1.0):
71
+ buf += llama.detokenize([token]).decode()
72
+ if token == llama.token_eos():
73
+ break
74
+ return buf
75
+
76
  def respond(
77
  message: str,
78
  history: List[Tuple[str, str]],
 
106
  global llm
107
  global llm_model
108
 
109
+ #llama = Llama("madlad400-3b-mt-q8_0.gguf")
110
  # Load the model
111
  if llm is None or llm_model != model:
112
  llm = Llama(
 
119
  n_threads_batch=8,
120
  )
121
  llm_model = model
122
+
123
+ return trans(message)
124
+
125
  provider = LlamaCppPythonProvider(llm)
126
 
127
  # Create the agent
 
185
  additional_inputs=[
186
  gr.Dropdown(
187
  choices=[
188
+ "madlad400-3b-mt-q8_0.gguf",
 
189
  ],
190
+ value="madlad400-3b-mt-q8_0.gguf",
191
  label="Model",
192
  info="Select the AI model to use for chat",
193
  ),