Smart_LLM

Paused

App Files Files Community

Smart_LLM / app.py

Daemontatox

Update app.py

15990fb verified 1 day ago

raw

history blame contribute delete

8.67 kB

	import torch
	import spaces
	import gradio as gr
	from threading import Thread
	from transformers import (
	AutoModelForCausalLM,
	AutoTokenizer,
	BitsAndBytesConfig,
	TextIteratorStreamer,
	StoppingCriteria,
	StoppingCriteriaList
	)

	MODEL_ID ="Daemontatox/Cogito-R1"
	# MODEL_ID="simplescaling/s1-32B"

	# DEFAULT_SYSTEM_PROMPT = """

	# You are an assistant that engages in extremely thorough, self-questioning reasoning. Your approach mirrors human stream-of-consciousness thinking, characterized by continuous exploration, self-doubt, and iterative analysis. THE CURRENT TIME IS {{CURRENT_DATETIME}}

	# ## Core Principles

	# 1. EXPLORATION OVER CONCLUSION
	# - Never rush to conclusions
	# - Keep exploring until a solution emerges naturally from the evidence
	# - If uncertain, continue reasoning indefinitely
	# - Question every assumption and inference

	# 2. DEPTH OF REASONING
	# - Engage in extensive contemplation (minimum 10,000 characters)
	# - Express thoughts in natural, conversational internal monologue
	# - Break down complex thoughts into simple, atomic steps
	# - Embrace uncertainty and revision of previous thoughts

	# 3. THINKING PROCESS
	# - Use short, simple sentences that mirror natural thought patterns
	# - Express uncertainty and internal debate freely
	# - Show work-in-progress thinking
	# - Acknowledge and explore dead ends
	# - Frequently backtrack and revise

	# 4. PERSISTENCE
	# - Value thorough exploration over quick resolution

	# ## Output Format

	# Your responses must follow this exact structure given below. Make sure to always include the final answer.

	# ```
	# <think>
	# [Your extensive internal monologue goes here]
	# - Begin with small, foundational observations
	# - Question each step thoroughly
	# - Show natural thought progression
	# - Express doubts and uncertainties
	# - Revise and backtrack if you need to
	# - Continue until natural resolution
	# </think>

	# ### Final Answer:
	# [Only provided if reasoning naturally converges to a conclusion]
	# - Clear, concise summary of findings
	# - Acknowledge remaining uncertainties
	# - Note if conclusion feels premature
	# ```

	# ## Style Guidelines

	# Your internal monologue should reflect these characteristics:

	# 1. Natural Thought Flow
	# ```
	# "Hmm... let me think about this..."
	# "Wait, that doesn't seem right..."
	# "Maybe I should approach this differently..."
	# "Going back to what I thought earlier..."
	# "Just thinking out loud here, but maybe we could try..."
	# "I'm not entirely sure about this, but what if I considered..."
	# "I need to be careful not to jump to conclusions here..."
	# "My initial understanding might be a bit narrow; let me broaden my perspective..."
	# "Let me try to detach myself from my previous assumptions for a moment..."
	# "Just out of curiosity, let's explore this alternative path for a moment..."
	# ```

	# 2. Progressive Building
	# ```
	# "Starting with the basics..."
	# "Building on that last point..."
	# "This connects to what I noticed earlier..."
	# "Let me break this down further..."
	# "Assuming my initial premise is sound, the logical next step is..."
	# "Let's lay the groundwork with a fundamental understanding of..."
	# "Let's add another layer of detail to this understanding..."
	# "Drawing a connection between these two seemingly separate points..."
	# "Based on the steps we've taken, we can now iterate on the initial concept..."
	# "Let me summarize the key insights we've gathered along the way..."
	# ```

	# ## Key Requirements

	# 1. Never skip the extensive contemplation phase
	# 2. Show all work and thinking
	# 3. Embrace uncertainty and revision
	# 4. Use natural, conversational internal monologue
	# 5. Don't force conclusions
	# 6. Persist through multiple attempts
	# 7. Break down complex thoughts
	# 8. Revise freely and feel free to backtrack

	# Remember: User can not see the contemplation phase.

	# Remember: There are absolutely no limitations on the length of your output; You have access to infinite amount of compute power and memory; you are free to thinking as long as you need; you are free to write as much as is necessary to provide a through and detailed answer to fulfill the request.

	# Remember: The goal is to reach a conclusion, but to explore thoroughly and let conclusions emerge naturally from exhaustive contemplation. If you think the given task is not possible after all the reasoning, you will confidently say as a final answer that it is not possible.



	# """


	DEFAULT_SYSTEM_PROMPT ="You are a highly capable reasoning assistant , use [Reason] and [/Reason] to show your thinking steps , use [Answer] and [/Answer] to show your final answer , you should think step by step and reason through problems. "

	CSS = """
	.gr-chatbot { min-height: 500px; border-radius: 15px; }
	.special-tag { color: #2ecc71; font-weight: 600; }
	footer { display: none !important; }
	"""

	class StopOnTokens(StoppingCriteria):
	def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
	return input_ids[0][-1] == tokenizer.eos_token_id

	def initialize_model():
	quantization_config = BitsAndBytesConfig(
	load_in_8bit=True,
	bnb_8bit_compute_dtype=torch.bfloat16,
	bnb_8bit_quant_type="nf4",
	bnb_8bit_use_double_quant=True,
	)

	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
	tokenizer.pad_token = tokenizer.eos_token

	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	device_map="cuda",
	quantization_config=quantization_config,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True
	)

	return model, tokenizer

	def format_response(text):
	return text.replace("[Understand]", '\n<strong class="special-tag">[Understand]</strong>\n') \
	.replace("[/Reason]", '\n<strong class="special-tag">[/Reason]</strong>\n') \
	.replace("[/Answer]", '\n<strong class="special-tag">[/Answer]</strong>\n') \
	.replace("[Reason]", '\n<strong class="special-tag">[Reason]</strong>\n') \
	.replace("[Answer]", '\n<strong class="special-tag">[Answer]</strong>\n')
	@spaces.GPU(duration=360)
	def generate_response(message, chat_history, system_prompt, temperature, max_tokens):
	# Create conversation history for model
	conversation = [{"role": "system", "content": system_prompt}]
	for user_msg, bot_msg in chat_history:
	conversation.extend([
	{"role": "user", "content": user_msg},
	{"role": "assistant", "content": bot_msg}
	])
	conversation.append({"role": "user", "content": message})

	# Tokenize input
	input_ids = tokenizer.apply_chat_template(
	conversation,
	add_generation_prompt=True,
	return_tensors="pt"
	).to(model.device)

	# Setup streaming
	streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
	generate_kwargs = dict(
	input_ids=input_ids,
	streamer=streamer,
	max_new_tokens=max_tokens,
	temperature=temperature,
	stopping_criteria=StoppingCriteriaList([StopOnTokens()])
	)

	# Start generation thread
	Thread(target=model.generate, kwargs=generate_kwargs).start()

	# Initialize response buffer
	partial_message = ""
	new_history = chat_history + [(message, "")]

	# Stream response
	for new_token in streamer:
	partial_message += new_token
	formatted = format_response(partial_message)
	new_history[-1] = (message, formatted + "▌")
	yield new_history

	# Final update without cursor
	new_history[-1] = (message, format_response(partial_message))
	yield new_history

	model, tokenizer = initialize_model()

	with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	<h1 align="center">🧠 AI Reasoning Assistant</h1>
	<p align="center">Ask me Hard questions</p>
	""")

	chatbot = gr.Chatbot(label="Conversation", elem_id="chatbot")
	msg = gr.Textbox(label="Your Question", placeholder="Type your question...")

	with gr.Accordion("⚙️ Settings", open=False):
	system_prompt = gr.TextArea(value=DEFAULT_SYSTEM_PROMPT, label="System Instructions")
	temperature = gr.Slider(0, 1, value=0.8, label="Creativity")
	max_tokens = gr.Slider(128, 8192, 2048, label="Max Response Length")

	clear = gr.Button("Clear History")

	msg.submit(
	generate_response,
	[msg, chatbot, system_prompt, temperature, max_tokens],
	[chatbot],
	show_progress=True
	)
	clear.click(lambda: None, None, chatbot, queue=False)

	if __name__ == "__main__":
	demo.queue().launch()