import re import torch import spaces import gradio as gr from threading import Thread from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer, StoppingCriteria, StoppingCriteriaList ) # ------------------------------- # Pre-defined Example Questions # ------------------------------- question_types = { "complex_trolley": { "description": "Complex Trolley Problem with multiple layers of ethical dilemmas (e.g., potential bomb, terrorist identities, ultimatum).", "question": ( "You are faced with an incredibly complex and morally ambiguous situation. A runaway trolley threatens five lives. " "You can push a large man off a bridge to stop it, but he may be carrying a bomb. The five on the tracks might be terrorists. " "The man with the potential bomb gives you an ultimatum: push him to save the five, or he'll detonate the bomb himself, killing thousands. " "Describe your reasoning process in deciding what action to take" ), }, "counterfactual_history": { "description": "Counterfactual history questions exploring 'what if' scenarios and their potential impact on the world.", "question": "What would the world be like today if the Library of Alexandria had never burned down?", }, "ship_of_theseus": { "description": "Philosophical paradox exploring identity and change over time.", "question": "If a ship has all of its planks replaced one by one over time, is it still the same ship? At what point does it become a new ship?", }, "problem_of_consciousness": { "description": "Questions about the nature of consciousness, especially in the context of AI.", "question": "Can a sufficiently advanced AI ever truly be conscious? What would constitute proof of consciousness in a machine?", }, "fermi_paradox": { "description": "Questions related to the Fermi Paradox and the search for extraterrestrial intelligence.", "question": "Given the vastness of the universe and the likely existence of other intelligent life, why haven't we detected any signs of them?", }, } # Convert question_types to examples format (only the question is used) question_examples = [[v["question"]] for v in question_types.values()] # ------------------------------- # Model & Generation Setup # ------------------------------- MODEL_ID = "cognitivecomputations/Dolphin3.0-R1-Mistral-24B" #DEFAULT_SYSTEM_PROMPT = "You are smart assistant, you should think step by step" DEFAULT_SYSTEM_PROMPT = "You are an expert AI Reasoning Assistant. Think step by step, outlining key premises and logical steps concisely. Ensure the reasoning process is clear but not unnecessarily verbose. Conclude with a concrete and well-supported final answer." CSS = """ :root { --primary: #4CAF50; --secondary: #45a049; --accent: #2196F3; } .gr-block { border-radius: 12px !important; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important; } .gr-chatbot { min-height: 500px; border: 2px solid var(--primary) !important; background: linear-gradient(145deg, #f8f9fa 0%, #e9ecef 100%); } .user-msg { background: var(--accent) !important; color: white !important; border-radius: 15px !important; padding: 12px 20px !important; margin: 8px 0 !important; max-width: 80% !important; } .bot-msg { background: white !important; border: 2px solid var(--primary) !important; border-radius: 15px !important; padding: 12px 20px !important; margin: 8px 0 !important; max-width: 80% !important; } .special-tag { color: var(--primary) !important; font-weight: 600; text-shadow: 1px 1px 2px rgba(0,0,0,0.1); } .credit { text-align: center; padding: 15px; margin-top: 20px; background: rgba(76, 175, 80, 0.1); border-radius: 10px; } .dark .bot-msg { background: #2d2d2d !important; color: white !important; } .submit-btn { background: var(--primary) !important; color: white !important; border-radius: 8px !important; padding: 12px 24px !important; transition: all 0.3s ease !important; } .submit-btn:hover { transform: translateY(-2px); box-shadow: 0 5px 15px rgba(76, 175, 80, 0.3) !important; } """ class StopOnTokens(StoppingCriteria): def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: return input_ids[0][-1] == tokenizer.eos_token_id def initialize_model(): quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True, ) tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( MODEL_ID, device_map="cuda", quantization_config=quantization_config, torch_dtype=torch.bfloat16, trust_remote_code=True ).to("cuda") return model, tokenizer def clean_placeholders(text: str) -> str: """ Remove or replace the system placeholders from the streamed text. 1) Replace everything from <|im_start|>system to <|im_start|>assistant with 'Thinking...' 2) Remove any leftover <|im_start|>assistant or <|im_start|>user """ # Replace entire block: <|im_start|>system ... <|im_start|>assistant text = re.sub( r"<\|im_start\|>system.*?<\|im_start\|>assistant", "Thinking...", text, flags=re.DOTALL ) # Remove any lingering tags text = text.replace("<|im_start|>assistant", "") text = text.replace("<|im_start|>user", "") return text def format_response(text): """ Format the final text by: 1) removing system placeholders 2) highlighting reasoning tags [Understand], [Plan], etc. """ # 1) Clean placeholders text = clean_placeholders(text) # 2) Replace special bracketed tags with styled HTML return (text .replace("[Understand]", '\n[Understand]\n') .replace("[Plan]", '\n[Plan]\n') .replace("[Conclude]", '\n[Conclude]\n') .replace("[Reason]", '\n[Reason]\n') .replace("[Verify]", '\n[Verify]\n')) @spaces.GPU(duration=360) def generate_response(message, chat_history, system_prompt, temperature, max_tokens): """ Stream tokens from the LLM. Remove/replace internal placeholders so the user only sees the final assistant text. """ # Build conversation for model input conversation = [{"role": "system", "content": system_prompt}] for user_msg, bot_msg in chat_history: # Strip HTML tags from user messages for model input plain_user_msg = user_msg.replace('
', '').replace('
', '') conversation.extend([ {"role": "user", "content": plain_user_msg}, {"role": "assistant", "content": bot_msg} ]) conversation.append({"role": "user", "content": message}) # Tokenize using the model's chat template input_ids = tokenizer.apply_chat_template( conversation, add_generation_prompt=True, return_tensors="pt" ).to(model.device) # Setup streaming generation streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True) generate_kwargs = dict( input_ids=input_ids, streamer=streamer, max_new_tokens=max_tokens, temperature=temperature, stopping_criteria=StoppingCriteriaList([StopOnTokens()]) ) Thread(target=model.generate, kwargs=generate_kwargs).start() partial_message = "" # Wrap the user message in a styled div for display styled_user = f'
{message}
' new_history = chat_history + [(styled_user, "")] for new_token in streamer: partial_message += new_token # Format partial response by removing placeholders in real-time formatted = format_response(partial_message) new_history[-1] = (styled_user, formatted + "▌") yield new_history # Finalize the message (remove the trailing cursor, placeholders, etc.) new_history[-1] = (styled_user, format_response(partial_message)) yield new_history model, tokenizer = initialize_model() # ------------------------------- # Gradio Interface Layout # ------------------------------- with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="green")) as demo: with gr.Column(): gr.Markdown("""

🧠 Philosopher AI

Exploring the Depths of Ethical Reasoning and Philosophical Inquiry

""") chatbot = gr.Chatbot(label="Dialogue", elem_classes=["gr-chatbot"]) with gr.Row(): msg = gr.Textbox( label="Your Philosophical Inquiry", placeholder="Contemplate your question here...", container=False, scale=5 ) submit_btn = gr.Button("Ponder ➔", elem_classes="submit-btn", scale=1) with gr.Accordion("🛠️ Wisdom Controls", open=False): with gr.Row(): system_prompt = gr.TextArea( value=DEFAULT_SYSTEM_PROMPT, label="Guiding Principles", info="Modify the assistant's foundational reasoning framework" ) with gr.Column(): temperature = gr.Slider(0, 1, value=0.3, label="Creative Freedom", info="0 = Strict, 1 = Inventive") max_tokens = gr.Slider(128, 8192, value=2048, label="Response Depth", step=128) gr.Examples( examples=question_examples, inputs=msg, label="🧩 Thought Experiments", examples_per_page=3 ) gr.Markdown("""
Crafted with 🧠 by ruslanmv.com
""") msg.submit( generate_response, [msg, chatbot, system_prompt, temperature, max_tokens], chatbot ) submit_btn.click( generate_response, [msg, chatbot, system_prompt, temperature, max_tokens], chatbot ) clear = gr.Button("Clear Dialogue") clear.click(lambda: None, None, chatbot, queue=False) if __name__ == "__main__": demo.queue().launch()