Spaces:

ruslanmv
/

AI-Reasoning-Assistant

Running on Zero

App Files Files Community

AI-Reasoning-Assistant / app.py

ruslanmv

Update app.py

999b3e7 verified 3 days ago

raw

history blame contribute delete

11.2 kB

	import re
	import torch
	import spaces
	import gradio as gr
	from threading import Thread
	from transformers import (
	AutoModelForCausalLM,
	AutoTokenizer,
	BitsAndBytesConfig,
	TextIteratorStreamer,
	StoppingCriteria,
	StoppingCriteriaList
	)

	# -------------------------------
	# Pre-defined Example Questions
	# -------------------------------
	question_types = {
	"complex_trolley": {
	"description": "Complex Trolley Problem with multiple layers of ethical dilemmas (e.g., potential bomb, terrorist identities, ultimatum).",
	"question": (
	"You are faced with an incredibly complex and morally ambiguous situation. A runaway trolley threatens five lives. "
	"You can push a large man off a bridge to stop it, but he may be carrying a bomb. The five on the tracks might be terrorists. "
	"The man with the potential bomb gives you an ultimatum: push him to save the five, or he'll detonate the bomb himself, killing thousands. "
	"Describe your reasoning process in deciding what action to take"
	),
	},

	"counterfactual_history": {
	"description": "Counterfactual history questions exploring 'what if' scenarios and their potential impact on the world.",
	"question": "What would the world be like today if the Library of Alexandria had never burned down?",
	},
	"ship_of_theseus": {
	"description": "Philosophical paradox exploring identity and change over time.",
	"question": "If a ship has all of its planks replaced one by one over time, is it still the same ship? At what point does it become a new ship?",
	},
	"problem_of_consciousness": {
	"description": "Questions about the nature of consciousness, especially in the context of AI.",
	"question": "Can a sufficiently advanced AI ever truly be conscious? What would constitute proof of consciousness in a machine?",
	},
	"fermi_paradox": {
	"description": "Questions related to the Fermi Paradox and the search for extraterrestrial intelligence.",
	"question": "Given the vastness of the universe and the likely existence of other intelligent life, why haven't we detected any signs of them?",
	},
	}

	# Convert question_types to examples format (only the question is used)
	question_examples = [[v["question"]] for v in question_types.values()]

	# -------------------------------
	# Model & Generation Setup
	# -------------------------------
	MODEL_ID = "cognitivecomputations/Dolphin3.0-R1-Mistral-24B"
	#DEFAULT_SYSTEM_PROMPT = "You are smart assistant, you should think step by step"
	DEFAULT_SYSTEM_PROMPT = "You are an expert AI Reasoning Assistant. Think step by step, outlining key premises and logical steps concisely. Ensure the reasoning process is clear but not unnecessarily verbose. Conclude with a concrete and well-supported final answer."

	CSS = """
	:root {
	--primary: #4CAF50;
	--secondary: #45a049;
	--accent: #2196F3;
	}

	.gr-block {
	border-radius: 12px !important;
	box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important;
	}

	.gr-chatbot {
	min-height: 500px;
	border: 2px solid var(--primary) !important;
	background: linear-gradient(145deg, #f8f9fa 0%, #e9ecef 100%);
	}

	.user-msg {
	background: var(--accent) !important;
	color: white !important;
	border-radius: 15px !important;
	padding: 12px 20px !important;
	margin: 8px 0 !important;
	max-width: 80% !important;
	}

	.bot-msg {
	background: white !important;
	border: 2px solid var(--primary) !important;
	border-radius: 15px !important;
	padding: 12px 20px !important;
	margin: 8px 0 !important;
	max-width: 80% !important;
	}

	.special-tag {
	color: var(--primary) !important;
	font-weight: 600;
	text-shadow: 1px 1px 2px rgba(0,0,0,0.1);
	}

	.credit {
	text-align: center;
	padding: 15px;
	margin-top: 20px;
	background: rgba(76, 175, 80, 0.1);
	border-radius: 10px;
	}

	.dark .bot-msg {
	background: #2d2d2d !important;
	color: white !important;
	}

	.submit-btn {
	background: var(--primary) !important;
	color: white !important;
	border-radius: 8px !important;
	padding: 12px 24px !important;
	transition: all 0.3s ease !important;
	}

	.submit-btn:hover {
	transform: translateY(-2px);
	box-shadow: 0 5px 15px rgba(76, 175, 80, 0.3) !important;
	}
	"""

	class StopOnTokens(StoppingCriteria):
	def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
	return input_ids[0][-1] == tokenizer.eos_token_id

	def initialize_model():
	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=torch.bfloat16,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_use_double_quant=True,
	)

	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
	tokenizer.pad_token = tokenizer.eos_token

	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	device_map="cuda",
	quantization_config=quantization_config,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True
	).to("cuda")

	return model, tokenizer

	def clean_placeholders(text: str) -> str:
	"""
	Remove or replace the system placeholders from the streamed text.
	1) Replace everything from <\|im_start\|>system to <\|im_start\|>assistant with 'Thinking...'
	2) Remove any leftover <\|im_start\|>assistant or <\|im_start\|>user
	"""
	# Replace entire block: <\|im_start\|>system ... <\|im_start\|>assistant
	text = re.sub(
	r"<\\|im_start\\|>system.*?<\\|im_start\\|>assistant",
	"Thinking...",
	text,
	flags=re.DOTALL
	)
	# Remove any lingering tags
	text = text.replace("<\|im_start\|>assistant", "")
	text = text.replace("<\|im_start\|>user", "")
	return text

	def format_response(text):
	"""
	Format the final text by:
	1) removing system placeholders
	2) highlighting reasoning tags [Understand], [Plan], etc.
	"""
	# 1) Clean placeholders
	text = clean_placeholders(text)

	# 2) Replace special bracketed tags with styled HTML
	return (text
	.replace("[Understand]", '\n<strong class="special-tag">[Understand]</strong>\n')
	.replace("[Plan]", '\n<strong class="special-tag">[Plan]</strong>\n')
	.replace("[Conclude]", '\n<strong class="special-tag">[Conclude]</strong>\n')
	.replace("[Reason]", '\n<strong class="special-tag">[Reason]</strong>\n')
	.replace("[Verify]", '\n<strong class="special-tag">[Verify]</strong>\n'))

	@spaces.GPU(duration=360)
	def generate_response(message, chat_history, system_prompt, temperature, max_tokens):
	"""
	Stream tokens from the LLM.
	Remove/replace internal placeholders so the user only sees the final assistant text.
	"""
	# Build conversation for model input
	conversation = [{"role": "system", "content": system_prompt}]
	for user_msg, bot_msg in chat_history:
	# Strip HTML tags from user messages for model input
	plain_user_msg = user_msg.replace('<div class="user-msg">', '').replace('</div>', '')
	conversation.extend([
	{"role": "user", "content": plain_user_msg},
	{"role": "assistant", "content": bot_msg}
	])
	conversation.append({"role": "user", "content": message})

	# Tokenize using the model's chat template
	input_ids = tokenizer.apply_chat_template(
	conversation,
	add_generation_prompt=True,
	return_tensors="pt"
	).to(model.device)

	# Setup streaming generation
	streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
	generate_kwargs = dict(
	input_ids=input_ids,
	streamer=streamer,
	max_new_tokens=max_tokens,
	temperature=temperature,
	stopping_criteria=StoppingCriteriaList([StopOnTokens()])
	)

	Thread(target=model.generate, kwargs=generate_kwargs).start()

	partial_message = ""
	# Wrap the user message in a styled div for display
	styled_user = f'<div class="user-msg">{message}</div>'
	new_history = chat_history + [(styled_user, "")]

	for new_token in streamer:
	partial_message += new_token
	# Format partial response by removing placeholders in real-time
	formatted = format_response(partial_message)
	new_history[-1] = (styled_user, formatted + "▌")
	yield new_history

	# Finalize the message (remove the trailing cursor, placeholders, etc.)
	new_history[-1] = (styled_user, format_response(partial_message))
	yield new_history

	model, tokenizer = initialize_model()

	# -------------------------------
	# Gradio Interface Layout
	# -------------------------------
	with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="green")) as demo:
	with gr.Column():
	gr.Markdown("""
	<h1 align="center" style="color: var(--primary); font-weight: 800; margin-bottom: 0;">
	🧠 Philosopher AI
	</h1>
	<p align="center" style="color: #666; font-size: 1.1em;">
	Exploring the Depths of Ethical Reasoning and Philosophical Inquiry
	</p>
	""")

	chatbot = gr.Chatbot(label="Dialogue", elem_classes=["gr-chatbot"])

	with gr.Row():
	msg = gr.Textbox(
	label="Your Philosophical Inquiry",
	placeholder="Contemplate your question here...",
	container=False,
	scale=5
	)
	submit_btn = gr.Button("Ponder ➔", elem_classes="submit-btn", scale=1)

	with gr.Accordion("🛠️ Wisdom Controls", open=False):
	with gr.Row():
	system_prompt = gr.TextArea(
	value=DEFAULT_SYSTEM_PROMPT,
	label="Guiding Principles",
	info="Modify the assistant's foundational reasoning framework"
	)
	with gr.Column():
	temperature = gr.Slider(0, 1, value=0.3,
	label="Creative Freedom",
	info="0 = Strict, 1 = Inventive")
	max_tokens = gr.Slider(128, 8192, value=2048,
	label="Response Depth",
	step=128)

	gr.Examples(
	examples=question_examples,
	inputs=msg,
	label="🧩 Thought Experiments",
	examples_per_page=3
	)

	gr.Markdown("""
	<div class="credit">
	Crafted with 🧠 by <a href="https://ruslanmv.com" target="_blank" style="color: var(--primary);">ruslanmv.com</a>
	</div>
	""")

	msg.submit(
	generate_response,
	[msg, chatbot, system_prompt, temperature, max_tokens],
	chatbot
	)
	submit_btn.click(
	generate_response,
	[msg, chatbot, system_prompt, temperature, max_tokens],
	chatbot
	)
	clear = gr.Button("Clear Dialogue")
	clear.click(lambda: None, None, chatbot, queue=False)

	if __name__ == "__main__":
	demo.queue().launch()