Spaces:

SimpleBerry
/

LLaMA-O1-Supervised-1129-Demo

Running

App Files Files Community

LLaMA-O1-Supervised-1129-Demo / app.py

jwu323

Update app.py

238ddf0 verified 7 months ago

raw

history blame

5.63 kB

	from typing import List, Tuple, Union
	import os
	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download

	class LlamaAssistant:
	def __init__(self, model_config: dict):
	self.model = Llama(
	model_path=hf_hub_download(
	repo_id=model_config.get("repo_id", "Lyte/LLaMA-O1-Supervised-1129-Q4_K_M-GGUF"),
	filename=model_config.get("model_file", "llama-o1-supervised-1129-q4_k_m.gguf"),
	)
	)
	self.template = "<start_of_father_id>-1<end_of_father_id><start_of_local_id>0<end_of_local_id><start_of_thought><problem>{content}<end_of_thought><start_of_rating><positive_rating><end_of_rating>\n<start_of_father_id>0<end_of_father_id><start_of_local_id>1<end_of_local_id><start_of_thought><expansion>"
	self.generate_cfg = model_config.get("generate_cfg", {
	"max_tokens": 512,
	"temperature": 0.7,
	"top_p": 0.95,
	})

	def _format_prompt(self, message: str) -> str:
	return self.template.format(content=message)

	def generate(self, message: str, history: List[Tuple[str, str]] = None) -> str:
	input_text = self._format_prompt(message)
	inputs = self.model.tokenize(input_text.encode('utf-8'))

	response = ""
	for token in self.model.generate(
	inputs,
	top_p=self.generate_cfg["top_p"],
	temp=self.generate_cfg["temperature"]
	):
	text = self.model.detokenize([token])
	response += text.decode('utf-8')
	yield response

	class WebUI:
	def __init__(self, assistant: LlamaAssistant, config: dict = None):
	self.assistant = assistant
	self.config = config or {}

	def create_interface(self):
	with gr.Blocks() as demo:
	gr.Markdown(self.config.get("description", """
	# LLaMA-O1-Supervised-1129 Demo
	An experimental research model focused on advancing AI reasoning capabilities.

	To start a new chat, click "clear" and start a new dialog.
	"""))

	chatbot = gr.ChatInterface(
	self.assistant.generate,
	title=self.config.get("title", "LLaMA-O1-Supervised-1129 \| Demo"),
	description=self.config.get("description", "Edit Settings below if needed."),
	examples=self.config.get("examples", [
	["How many r's are in the word strawberry?"],
	['If Diana needs to bike 10 miles to reach home and she can bike at a speed of 3 mph for two hours before getting tired, and then at a speed of 1 mph until she reaches home, how long will it take her to get home?'],
	['Find the least odd prime factor of $2019^8+1$.'],
	]),
	cache_examples=False,
	fill_height=True
	)

	with gr.Accordion("Adjust Parameters", open=False):
	gr.Slider(
	minimum=128,
	maximum=8192,
	value=self.assistant.generate_cfg["max_tokens"],
	step=1,
	label="Max Tokens"
	)
	gr.Slider(
	minimum=0.1,
	maximum=1.5,
	value=self.assistant.generate_cfg["temperature"],
	step=0.1,
	label="Temperature"
	)
	gr.Slider(
	minimum=0.05,
	maximum=1.0,
	value=self.assistant.generate_cfg["top_p"],
	step=0.01,
	label="Top-p (nucleus sampling)"
	)

	gr.Markdown(self.config.get("license", "--- MIT License ---"))

	return demo

	def run(self, **kwargs):
	demo = self.create_interface()
	demo.launch(**kwargs)

	def app_gui():
	# Define model configuration
	model_config = {
	"repo_id": os.environ.get("REPO_ID", "Lyte/LLaMA-O1-Supervised-1129-Q4_K_M-GGUF"),
	"model_file": os.environ.get("MODEL_FILE", "llama-o1-supervised-1129-q4_k_m.gguf"),
	"generate_cfg": {
	"max_tokens": 512,
	"temperature": float(os.environ.get("T", 0.7)),
	"top_p": float(os.environ.get("P", 0.95)),
	}
	}

	# UI configuration
	ui_config = {
	"title": "LLaMA-O1-Supervised-1129 \| Demo",
	"description":
	'''
	# SimpleBerry/LLaMA-O1-Supervised-1129 \| Duplicate the space and set it to private for faster & personal inference for free.
	SimpleBerry/LLaMA-O1-Supervised-1129: an experimental research model developed by the SimpleBerry.
	Focused on advancing AI reasoning capabilities.

	## This Space was designed by Lyte/LLaMA-O1-Supervised-1129-GGUF, Many Thanks!

	To start a new chat, click "clear" and start a new dialog.
	''',
	"examples": [
	["How many r's are in the word strawberry?"],
	['If Diana needs to bike 10 miles to reach home and she can bike at a speed of 3 mph for two hours before getting tired, and then at a speed of 1 mph until she reaches home, how long will it take her to get home?'],
	['Find the least odd prime factor of $2019^8+1$.'],
	],
	"license": "--- MIT License ---"
	}

	# Create and run the web interface
	assistant = LlamaAssistant(model_config)
	WebUI(assistant, ui_config).run()

	if __name__ == '__main__':
	app_gui()