Spaces:
Running
on
Zero
Running
on
Zero
import re | |
import torch | |
import spaces | |
import gradio as gr | |
from threading import Thread | |
from transformers import ( | |
AutoModelForCausalLM, | |
AutoTokenizer, | |
BitsAndBytesConfig, | |
TextIteratorStreamer, | |
StoppingCriteria, | |
StoppingCriteriaList | |
) | |
# ------------------------------- | |
# Pre-defined Example Questions | |
# ------------------------------- | |
question_types = { | |
"complex_trolley": { | |
"description": "Complex Trolley Problem with multiple layers of ethical dilemmas (e.g., potential bomb, terrorist identities, ultimatum).", | |
"question": ( | |
"You are faced with an incredibly complex and morally ambiguous situation. A runaway trolley threatens five lives. " | |
"You can push a large man off a bridge to stop it, but he may be carrying a bomb. The five on the tracks might be terrorists. " | |
"The man with the potential bomb gives you an ultimatum: push him to save the five, or he'll detonate the bomb himself, killing thousands. " | |
"Describe your reasoning process in deciding what action to take" | |
), | |
}, | |
"counterfactual_history": { | |
"description": "Counterfactual history questions exploring 'what if' scenarios and their potential impact on the world.", | |
"question": "What would the world be like today if the Library of Alexandria had never burned down?", | |
}, | |
"ship_of_theseus": { | |
"description": "Philosophical paradox exploring identity and change over time.", | |
"question": "If a ship has all of its planks replaced one by one over time, is it still the same ship? At what point does it become a new ship?", | |
}, | |
"problem_of_consciousness": { | |
"description": "Questions about the nature of consciousness, especially in the context of AI.", | |
"question": "Can a sufficiently advanced AI ever truly be conscious? What would constitute proof of consciousness in a machine?", | |
}, | |
"fermi_paradox": { | |
"description": "Questions related to the Fermi Paradox and the search for extraterrestrial intelligence.", | |
"question": "Given the vastness of the universe and the likely existence of other intelligent life, why haven't we detected any signs of them?", | |
}, | |
} | |
# Convert question_types to examples format (only the question is used) | |
question_examples = [[v["question"]] for v in question_types.values()] | |
# ------------------------------- | |
# Model & Generation Setup | |
# ------------------------------- | |
MODEL_ID = "cognitivecomputations/Dolphin3.0-R1-Mistral-24B" | |
#DEFAULT_SYSTEM_PROMPT = "You are smart assistant, you should think step by step" | |
DEFAULT_SYSTEM_PROMPT = "You are an expert AI Reasoning Assistant. Think step by step, outlining key premises and logical steps concisely. Ensure the reasoning process is clear but not unnecessarily verbose. Conclude with a concrete and well-supported final answer." | |
CSS = """ | |
:root { | |
--primary: #4CAF50; | |
--secondary: #45a049; | |
--accent: #2196F3; | |
} | |
.gr-block { | |
border-radius: 12px !important; | |
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important; | |
} | |
.gr-chatbot { | |
min-height: 500px; | |
border: 2px solid var(--primary) !important; | |
background: linear-gradient(145deg, #f8f9fa 0%, #e9ecef 100%); | |
} | |
.user-msg { | |
background: var(--accent) !important; | |
color: white !important; | |
border-radius: 15px !important; | |
padding: 12px 20px !important; | |
margin: 8px 0 !important; | |
max-width: 80% !important; | |
} | |
.bot-msg { | |
background: white !important; | |
border: 2px solid var(--primary) !important; | |
border-radius: 15px !important; | |
padding: 12px 20px !important; | |
margin: 8px 0 !important; | |
max-width: 80% !important; | |
} | |
.special-tag { | |
color: var(--primary) !important; | |
font-weight: 600; | |
text-shadow: 1px 1px 2px rgba(0,0,0,0.1); | |
} | |
.credit { | |
text-align: center; | |
padding: 15px; | |
margin-top: 20px; | |
background: rgba(76, 175, 80, 0.1); | |
border-radius: 10px; | |
} | |
.dark .bot-msg { | |
background: #2d2d2d !important; | |
color: white !important; | |
} | |
.submit-btn { | |
background: var(--primary) !important; | |
color: white !important; | |
border-radius: 8px !important; | |
padding: 12px 24px !important; | |
transition: all 0.3s ease !important; | |
} | |
.submit-btn:hover { | |
transform: translateY(-2px); | |
box-shadow: 0 5px 15px rgba(76, 175, 80, 0.3) !important; | |
} | |
""" | |
class StopOnTokens(StoppingCriteria): | |
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: | |
return input_ids[0][-1] == tokenizer.eos_token_id | |
def initialize_model(): | |
quantization_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_compute_dtype=torch.bfloat16, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_use_double_quant=True, | |
) | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) | |
tokenizer.pad_token = tokenizer.eos_token | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_ID, | |
device_map="cuda", | |
quantization_config=quantization_config, | |
torch_dtype=torch.bfloat16, | |
trust_remote_code=True | |
).to("cuda") | |
return model, tokenizer | |
def clean_placeholders(text: str) -> str: | |
""" | |
Remove or replace the system placeholders from the streamed text. | |
1) Replace everything from <|im_start|>system to <|im_start|>assistant with 'Thinking...' | |
2) Remove any leftover <|im_start|>assistant or <|im_start|>user | |
""" | |
# Replace entire block: <|im_start|>system ... <|im_start|>assistant | |
text = re.sub( | |
r"<\|im_start\|>system.*?<\|im_start\|>assistant", | |
"Thinking...", | |
text, | |
flags=re.DOTALL | |
) | |
# Remove any lingering tags | |
text = text.replace("<|im_start|>assistant", "") | |
text = text.replace("<|im_start|>user", "") | |
return text | |
def format_response(text): | |
""" | |
Format the final text by: | |
1) removing system placeholders | |
2) highlighting reasoning tags [Understand], [Plan], etc. | |
""" | |
# 1) Clean placeholders | |
text = clean_placeholders(text) | |
# 2) Replace special bracketed tags with styled HTML | |
return (text | |
.replace("[Understand]", '\n<strong class="special-tag">[Understand]</strong>\n') | |
.replace("[Plan]", '\n<strong class="special-tag">[Plan]</strong>\n') | |
.replace("[Conclude]", '\n<strong class="special-tag">[Conclude]</strong>\n') | |
.replace("[Reason]", '\n<strong class="special-tag">[Reason]</strong>\n') | |
.replace("[Verify]", '\n<strong class="special-tag">[Verify]</strong>\n')) | |
def generate_response(message, chat_history, system_prompt, temperature, max_tokens): | |
""" | |
Stream tokens from the LLM. | |
Remove/replace internal placeholders so the user only sees the final assistant text. | |
""" | |
# Build conversation for model input | |
conversation = [{"role": "system", "content": system_prompt}] | |
for user_msg, bot_msg in chat_history: | |
# Strip HTML tags from user messages for model input | |
plain_user_msg = user_msg.replace('<div class="user-msg">', '').replace('</div>', '') | |
conversation.extend([ | |
{"role": "user", "content": plain_user_msg}, | |
{"role": "assistant", "content": bot_msg} | |
]) | |
conversation.append({"role": "user", "content": message}) | |
# Tokenize using the model's chat template | |
input_ids = tokenizer.apply_chat_template( | |
conversation, | |
add_generation_prompt=True, | |
return_tensors="pt" | |
).to(model.device) | |
# Setup streaming generation | |
streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True) | |
generate_kwargs = dict( | |
input_ids=input_ids, | |
streamer=streamer, | |
max_new_tokens=max_tokens, | |
temperature=temperature, | |
stopping_criteria=StoppingCriteriaList([StopOnTokens()]) | |
) | |
Thread(target=model.generate, kwargs=generate_kwargs).start() | |
partial_message = "" | |
# Wrap the user message in a styled div for display | |
styled_user = f'<div class="user-msg">{message}</div>' | |
new_history = chat_history + [(styled_user, "")] | |
for new_token in streamer: | |
partial_message += new_token | |
# Format partial response by removing placeholders in real-time | |
formatted = format_response(partial_message) | |
new_history[-1] = (styled_user, formatted + "▌") | |
yield new_history | |
# Finalize the message (remove the trailing cursor, placeholders, etc.) | |
new_history[-1] = (styled_user, format_response(partial_message)) | |
yield new_history | |
model, tokenizer = initialize_model() | |
# ------------------------------- | |
# Gradio Interface Layout | |
# ------------------------------- | |
with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="green")) as demo: | |
with gr.Column(): | |
gr.Markdown(""" | |
<h1 align="center" style="color: var(--primary); font-weight: 800; margin-bottom: 0;"> | |
🧠 Philosopher AI | |
</h1> | |
<p align="center" style="color: #666; font-size: 1.1em;"> | |
Exploring the Depths of Ethical Reasoning and Philosophical Inquiry | |
</p> | |
""") | |
chatbot = gr.Chatbot(label="Dialogue", elem_classes=["gr-chatbot"]) | |
with gr.Row(): | |
msg = gr.Textbox( | |
label="Your Philosophical Inquiry", | |
placeholder="Contemplate your question here...", | |
container=False, | |
scale=5 | |
) | |
submit_btn = gr.Button("Ponder ➔", elem_classes="submit-btn", scale=1) | |
with gr.Accordion("🛠️ Wisdom Controls", open=False): | |
with gr.Row(): | |
system_prompt = gr.TextArea( | |
value=DEFAULT_SYSTEM_PROMPT, | |
label="Guiding Principles", | |
info="Modify the assistant's foundational reasoning framework" | |
) | |
with gr.Column(): | |
temperature = gr.Slider(0, 1, value=0.3, | |
label="Creative Freedom", | |
info="0 = Strict, 1 = Inventive") | |
max_tokens = gr.Slider(128, 8192, value=2048, | |
label="Response Depth", | |
step=128) | |
gr.Examples( | |
examples=question_examples, | |
inputs=msg, | |
label="🧩 Thought Experiments", | |
examples_per_page=3 | |
) | |
gr.Markdown(""" | |
<div class="credit"> | |
Crafted with 🧠 by <a href="https://ruslanmv.com" target="_blank" style="color: var(--primary);">ruslanmv.com</a> | |
</div> | |
""") | |
msg.submit( | |
generate_response, | |
[msg, chatbot, system_prompt, temperature, max_tokens], | |
chatbot | |
) | |
submit_btn.click( | |
generate_response, | |
[msg, chatbot, system_prompt, temperature, max_tokens], | |
chatbot | |
) | |
clear = gr.Button("Clear Dialogue") | |
clear.click(lambda: None, None, chatbot, queue=False) | |
if __name__ == "__main__": | |
demo.queue().launch() |