Spaces:

ruslanmv
/

AI-Reasoning-Assistant

Runtime error

File size: 11,166 Bytes

import re
import torch
import spaces
import gradio as gr
from threading import Thread
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    BitsAndBytesConfig, 
    TextIteratorStreamer,
    StoppingCriteria,
    StoppingCriteriaList
)

# -------------------------------
# Pre-defined Example Questions
# -------------------------------
question_types = {
    "complex_trolley": {
        "description": "Complex Trolley Problem with multiple layers of ethical dilemmas (e.g., potential bomb, terrorist identities, ultimatum).",
        "question": (
            "You are faced with an incredibly complex and morally ambiguous situation. A runaway trolley threatens five lives. "
            "You can push a large man off a bridge to stop it, but he may be carrying a bomb. The five on the tracks might be terrorists. "
            "The man with the potential bomb gives you an ultimatum: push him to save the five, or he'll detonate the bomb himself, killing thousands. "
            "Describe your reasoning process in deciding what action to take"
        ),
    },

    "counterfactual_history": {
        "description": "Counterfactual history questions exploring 'what if' scenarios and their potential impact on the world.",
        "question": "What would the world be like today if the Library of Alexandria had never burned down?",
    },
    "ship_of_theseus": {
        "description": "Philosophical paradox exploring identity and change over time.",
        "question": "If a ship has all of its planks replaced one by one over time, is it still the same ship? At what point does it become a new ship?",
    },
    "problem_of_consciousness": {
        "description": "Questions about the nature of consciousness, especially in the context of AI.",
        "question": "Can a sufficiently advanced AI ever truly be conscious? What would constitute proof of consciousness in a machine?",
    },
    "fermi_paradox": {
        "description": "Questions related to the Fermi Paradox and the search for extraterrestrial intelligence.",
        "question": "Given the vastness of the universe and the likely existence of other intelligent life, why haven't we detected any signs of them?",
    },
}

# Convert question_types to examples format (only the question is used)
question_examples = [[v["question"]] for v in question_types.values()]

# -------------------------------
# Model & Generation Setup
# -------------------------------
MODEL_ID = "cognitivecomputations/Dolphin3.0-R1-Mistral-24B"
#DEFAULT_SYSTEM_PROMPT = "You are smart assistant, you should think step by step"
DEFAULT_SYSTEM_PROMPT = "You are an expert AI Reasoning Assistant. Think step by step, outlining key premises and logical steps concisely. Ensure the reasoning process is clear but not unnecessarily verbose. Conclude with a concrete and well-supported final answer."

CSS = """
:root {
    --primary: #4CAF50;
    --secondary: #45a049;
    --accent: #2196F3;
}

.gr-block {
    border-radius: 12px !important;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important;
}

.gr-chatbot {
    min-height: 500px;
    border: 2px solid var(--primary) !important;
    background: linear-gradient(145deg, #f8f9fa 0%, #e9ecef 100%);
}

.user-msg {
    background: var(--accent) !important;
    color: white !important;
    border-radius: 15px !important;
    padding: 12px 20px !important;
    margin: 8px 0 !important;
    max-width: 80% !important;
}

.bot-msg {
    background: white !important;
    border: 2px solid var(--primary) !important;
    border-radius: 15px !important;
    padding: 12px 20px !important;
    margin: 8px 0 !important;
    max-width: 80% !important;
}

.special-tag {
    color: var(--primary) !important;
    font-weight: 600;
    text-shadow: 1px 1px 2px rgba(0,0,0,0.1);
}

.credit {
    text-align: center;
    padding: 15px;
    margin-top: 20px;
    background: rgba(76, 175, 80, 0.1);
    border-radius: 10px;
}

.dark .bot-msg {
    background: #2d2d2d !important;
    color: white !important;
}

.submit-btn {
    background: var(--primary) !important;
    color: white !important;
    border-radius: 8px !important;
    padding: 12px 24px !important;
    transition: all 0.3s ease !important;
}

.submit-btn:hover {
    transform: translateY(-2px);
    box-shadow: 0 5px 15px rgba(76, 175, 80, 0.3) !important;
}
"""

class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        return input_ids[0][-1] == tokenizer.eos_token_id

def initialize_model():
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
    )

    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        device_map="cuda",
        quantization_config=quantization_config,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True
    ).to("cuda")

    return model, tokenizer

def clean_placeholders(text: str) -> str:
    """
    Remove or replace the system placeholders from the streamed text.
    1) Replace everything from <|im_start|>system to <|im_start|>assistant with 'Thinking...'
    2) Remove any leftover <|im_start|>assistant or <|im_start|>user
    """
    # Replace entire block: <|im_start|>system ... <|im_start|>assistant
    text = re.sub(
        r"<\|im_start\|>system.*?<\|im_start\|>assistant", 
        "Thinking...", 
        text, 
        flags=re.DOTALL
    )
    # Remove any lingering tags
    text = text.replace("<|im_start|>assistant", "")
    text = text.replace("<|im_start|>user", "")
    return text

def format_response(text):
    """
    Format the final text by:
    1) removing system placeholders
    2) highlighting reasoning tags [Understand], [Plan], etc.
    """
    # 1) Clean placeholders
    text = clean_placeholders(text)
    
    # 2) Replace special bracketed tags with styled HTML
    return (text
            .replace("[Understand]", '\n<strong class="special-tag">[Understand]</strong>\n')
            .replace("[Plan]", '\n<strong class="special-tag">[Plan]</strong>\n')
            .replace("[Conclude]", '\n<strong class="special-tag">[Conclude]</strong>\n')
            .replace("[Reason]", '\n<strong class="special-tag">[Reason]</strong>\n')
            .replace("[Verify]", '\n<strong class="special-tag">[Verify]</strong>\n'))

@spaces.GPU(duration=360)
def generate_response(message, chat_history, system_prompt, temperature, max_tokens):
    """
    Stream tokens from the LLM. 
    Remove/replace internal placeholders so the user only sees the final assistant text.
    """
    # Build conversation for model input
    conversation = [{"role": "system", "content": system_prompt}]
    for user_msg, bot_msg in chat_history:
        # Strip HTML tags from user messages for model input
        plain_user_msg = user_msg.replace('<div class="user-msg">', '').replace('</div>', '')
        conversation.extend([
            {"role": "user", "content": plain_user_msg},
            {"role": "assistant", "content": bot_msg}
        ])
    conversation.append({"role": "user", "content": message})

    # Tokenize using the model's chat template
    input_ids = tokenizer.apply_chat_template(
        conversation,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

    # Setup streaming generation
    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
    generate_kwargs = dict(
        input_ids=input_ids,
        streamer=streamer,
        max_new_tokens=max_tokens,
        temperature=temperature,
        stopping_criteria=StoppingCriteriaList([StopOnTokens()])
    )

    Thread(target=model.generate, kwargs=generate_kwargs).start()

    partial_message = ""
    # Wrap the user message in a styled div for display
    styled_user = f'<div class="user-msg">{message}</div>'
    new_history = chat_history + [(styled_user, "")]

    for new_token in streamer:
        partial_message += new_token
        # Format partial response by removing placeholders in real-time
        formatted = format_response(partial_message)
        new_history[-1] = (styled_user, formatted + "▌")
        yield new_history

    # Finalize the message (remove the trailing cursor, placeholders, etc.)
    new_history[-1] = (styled_user, format_response(partial_message))
    yield new_history

model, tokenizer = initialize_model()

# -------------------------------
# Gradio Interface Layout
# -------------------------------
with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="green")) as demo:
    with gr.Column():
        gr.Markdown("""
        <h1 align="center" style="color: var(--primary); font-weight: 800; margin-bottom: 0;">
            🧠 Philosopher AI
        </h1>
        <p align="center" style="color: #666; font-size: 1.1em;">
            Exploring the Depths of Ethical Reasoning and Philosophical Inquiry
        </p>
        """)
        
        chatbot = gr.Chatbot(label="Dialogue", elem_classes=["gr-chatbot"])
        
        with gr.Row():
            msg = gr.Textbox(
                label="Your Philosophical Inquiry",
                placeholder="Contemplate your question here...",
                container=False,
                scale=5
            )
            submit_btn = gr.Button("Ponder ➔", elem_classes="submit-btn", scale=1)
        
        with gr.Accordion("🛠️ Wisdom Controls", open=False):
            with gr.Row():
                system_prompt = gr.TextArea(
                    value=DEFAULT_SYSTEM_PROMPT, 
                    label="Guiding Principles",
                    info="Modify the assistant's foundational reasoning framework"
                )
                with gr.Column():
                    temperature = gr.Slider(0, 1, value=0.3, 
                                         label="Creative Freedom",
                                         info="0 = Strict, 1 = Inventive")
                    max_tokens = gr.Slider(128, 8192, value=2048, 
                                        label="Response Depth",
                                        step=128)
        
        gr.Examples(
            examples=question_examples,
            inputs=msg,
            label="🧩 Thought Experiments",
            examples_per_page=3
        )
        
        gr.Markdown("""
        <div class="credit">
            Crafted with 🧠 by <a href="https://ruslanmv.com" target="_blank" style="color: var(--primary);">ruslanmv.com</a>
        </div>
        """)

    msg.submit(
        generate_response,
        [msg, chatbot, system_prompt, temperature, max_tokens],
        chatbot
    )
    submit_btn.click(
        generate_response,
        [msg, chatbot, system_prompt, temperature, max_tokens],
        chatbot
    )
    clear = gr.Button("Clear Dialogue")
    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    demo.queue().launch()