Akjava commited on
Commit
aa9930d
·
verified ·
1 Parent(s): d516a21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -3
app.py CHANGED
@@ -66,7 +66,7 @@ import multiprocessing
66
 
67
  import llama_cpp
68
 
69
- def test():
70
 
71
 
72
  llama_cpp.llama_backend_init(numa=False)
@@ -283,6 +283,15 @@ def respond(
283
  top_k: int,
284
  repeat_penalty: float,
285
  ):
 
 
 
 
 
 
 
 
 
286
  """
287
  Respond to a message using the Gemma3 model via Llama.cpp.
288
 
@@ -320,7 +329,7 @@ def respond(
320
  llm_model = model
321
 
322
  trans(message)
323
- yield "done"
324
 
325
  provider = LlamaCppPythonProvider(llm)
326
 
@@ -367,7 +376,7 @@ def respond(
367
  outputs = ""
368
  for output in stream:
369
  outputs += output
370
- yield outputs
371
 
372
  # Handle exceptions that may occur during the process
373
  except Exception as e:
 
66
 
67
  import llama_cpp
68
 
69
+ def low_level():
70
 
71
 
72
  llama_cpp.llama_backend_init(numa=False)
 
283
  top_k: int,
284
  repeat_penalty: float,
285
  ):
286
+ llama = Llama("models/madlad400-3b-mt-q8_0.gguf")
287
+ tokens = llama.tokenize(fb"{message}")
288
+ llama.encode(tokens)
289
+ tokens = [llama.decoder_start_token()]
290
+ for token in llama.generate(tokens, top_k=40, top_p=0.95, temp=1, repeat_penalty=1.0):
291
+ yield (llama.detokenize([token]))
292
+ if token == llama.token_eos():
293
+ break
294
+
295
  """
296
  Respond to a message using the Gemma3 model via Llama.cpp.
297
 
 
329
  llm_model = model
330
 
331
  trans(message)
332
+ #yield "done"
333
 
334
  provider = LlamaCppPythonProvider(llm)
335
 
 
376
  outputs = ""
377
  for output in stream:
378
  outputs += output
379
+ #yield outputs
380
 
381
  # Handle exceptions that may occur during the process
382
  except Exception as e: