File size: 5,631 Bytes
b377b1e fc46f2c ee950e1 fc46f2c b377b1e 238ddf0 b377b1e fc46f2c 7271565 fc46f2c b377b1e ee950e1 b377b1e 238ddf0 ee950e1 b377b1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
from typing import List, Tuple, Union
import os
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
class LlamaAssistant:
def __init__(self, model_config: dict):
self.model = Llama(
model_path=hf_hub_download(
repo_id=model_config.get("repo_id", "Lyte/LLaMA-O1-Supervised-1129-Q4_K_M-GGUF"),
filename=model_config.get("model_file", "llama-o1-supervised-1129-q4_k_m.gguf"),
)
)
self.template = "<start_of_father_id>-1<end_of_father_id><start_of_local_id>0<end_of_local_id><start_of_thought><problem>{content}<end_of_thought><start_of_rating><positive_rating><end_of_rating>\n<start_of_father_id>0<end_of_father_id><start_of_local_id>1<end_of_local_id><start_of_thought><expansion>"
self.generate_cfg = model_config.get("generate_cfg", {
"max_tokens": 512,
"temperature": 0.7,
"top_p": 0.95,
})
def _format_prompt(self, message: str) -> str:
return self.template.format(content=message)
def generate(self, message: str, history: List[Tuple[str, str]] = None) -> str:
input_text = self._format_prompt(message)
inputs = self.model.tokenize(input_text.encode('utf-8'))
response = ""
for token in self.model.generate(
inputs,
top_p=self.generate_cfg["top_p"],
temp=self.generate_cfg["temperature"]
):
text = self.model.detokenize([token])
response += text.decode('utf-8')
yield response
class WebUI:
def __init__(self, assistant: LlamaAssistant, config: dict = None):
self.assistant = assistant
self.config = config or {}
def create_interface(self):
with gr.Blocks() as demo:
gr.Markdown(self.config.get("description", """
# LLaMA-O1-Supervised-1129 Demo
An experimental research model focused on advancing AI reasoning capabilities.
**To start a new chat**, click "clear" and start a new dialog.
"""))
chatbot = gr.ChatInterface(
self.assistant.generate,
title=self.config.get("title", "LLaMA-O1-Supervised-1129 | Demo"),
description=self.config.get("description", "Edit Settings below if needed."),
examples=self.config.get("examples", [
["How many r's are in the word strawberry?"],
['If Diana needs to bike 10 miles to reach home and she can bike at a speed of 3 mph for two hours before getting tired, and then at a speed of 1 mph until she reaches home, how long will it take her to get home?'],
['Find the least odd prime factor of $2019^8+1$.'],
]),
cache_examples=False,
fill_height=True
)
with gr.Accordion("Adjust Parameters", open=False):
gr.Slider(
minimum=128,
maximum=8192,
value=self.assistant.generate_cfg["max_tokens"],
step=1,
label="Max Tokens"
)
gr.Slider(
minimum=0.1,
maximum=1.5,
value=self.assistant.generate_cfg["temperature"],
step=0.1,
label="Temperature"
)
gr.Slider(
minimum=0.05,
maximum=1.0,
value=self.assistant.generate_cfg["top_p"],
step=0.01,
label="Top-p (nucleus sampling)"
)
gr.Markdown(self.config.get("license", "--- MIT License ---"))
return demo
def run(self, **kwargs):
demo = self.create_interface()
demo.launch(**kwargs)
def app_gui():
# Define model configuration
model_config = {
"repo_id": os.environ.get("REPO_ID", "Lyte/LLaMA-O1-Supervised-1129-Q4_K_M-GGUF"),
"model_file": os.environ.get("MODEL_FILE", "llama-o1-supervised-1129-q4_k_m.gguf"),
"generate_cfg": {
"max_tokens": 512,
"temperature": float(os.environ.get("T", 0.7)),
"top_p": float(os.environ.get("P", 0.95)),
}
}
# UI configuration
ui_config = {
"title": "LLaMA-O1-Supervised-1129 | Demo",
"description":
'''
# SimpleBerry/LLaMA-O1-Supervised-1129 | Duplicate the space and set it to private for faster & personal inference for free.
SimpleBerry/LLaMA-O1-Supervised-1129: an experimental research model developed by the SimpleBerry.
Focused on advancing AI reasoning capabilities.
## This Space was designed by Lyte/LLaMA-O1-Supervised-1129-GGUF, Many Thanks!
**To start a new chat**, click "clear" and start a new dialog.
''',
"examples": [
["How many r's are in the word strawberry?"],
['If Diana needs to bike 10 miles to reach home and she can bike at a speed of 3 mph for two hours before getting tired, and then at a speed of 1 mph until she reaches home, how long will it take her to get home?'],
['Find the least odd prime factor of $2019^8+1$.'],
],
"license": "--- MIT License ---"
}
# Create and run the web interface
assistant = LlamaAssistant(model_config)
WebUI(assistant, ui_config).run()
if __name__ == '__main__':
app_gui() |