File size: 11,166 Bytes
be890b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6cea18
be890b6
 
999b3e7
be890b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c35c49e
 
be890b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
import re
import torch
import spaces
import gradio as gr
from threading import Thread
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    BitsAndBytesConfig, 
    TextIteratorStreamer,
    StoppingCriteria,
    StoppingCriteriaList
)

# -------------------------------
# Pre-defined Example Questions
# -------------------------------
question_types = {
    "complex_trolley": {
        "description": "Complex Trolley Problem with multiple layers of ethical dilemmas (e.g., potential bomb, terrorist identities, ultimatum).",
        "question": (
            "You are faced with an incredibly complex and morally ambiguous situation. A runaway trolley threatens five lives. "
            "You can push a large man off a bridge to stop it, but he may be carrying a bomb. The five on the tracks might be terrorists. "
            "The man with the potential bomb gives you an ultimatum: push him to save the five, or he'll detonate the bomb himself, killing thousands. "
            "Describe your reasoning process in deciding what action to take"
        ),
    },

    "counterfactual_history": {
        "description": "Counterfactual history questions exploring 'what if' scenarios and their potential impact on the world.",
        "question": "What would the world be like today if the Library of Alexandria had never burned down?",
    },
    "ship_of_theseus": {
        "description": "Philosophical paradox exploring identity and change over time.",
        "question": "If a ship has all of its planks replaced one by one over time, is it still the same ship? At what point does it become a new ship?",
    },
    "problem_of_consciousness": {
        "description": "Questions about the nature of consciousness, especially in the context of AI.",
        "question": "Can a sufficiently advanced AI ever truly be conscious? What would constitute proof of consciousness in a machine?",
    },
    "fermi_paradox": {
        "description": "Questions related to the Fermi Paradox and the search for extraterrestrial intelligence.",
        "question": "Given the vastness of the universe and the likely existence of other intelligent life, why haven't we detected any signs of them?",
    },
}

# Convert question_types to examples format (only the question is used)
question_examples = [[v["question"]] for v in question_types.values()]

# -------------------------------
# Model & Generation Setup
# -------------------------------
MODEL_ID = "cognitivecomputations/Dolphin3.0-R1-Mistral-24B"
#DEFAULT_SYSTEM_PROMPT = "You are smart assistant, you should think step by step"
DEFAULT_SYSTEM_PROMPT = "You are an expert AI Reasoning Assistant. Think step by step, outlining key premises and logical steps concisely. Ensure the reasoning process is clear but not unnecessarily verbose. Conclude with a concrete and well-supported final answer."

CSS = """
:root {
    --primary: #4CAF50;
    --secondary: #45a049;
    --accent: #2196F3;
}

.gr-block {
    border-radius: 12px !important;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important;
}

.gr-chatbot {
    min-height: 500px;
    border: 2px solid var(--primary) !important;
    background: linear-gradient(145deg, #f8f9fa 0%, #e9ecef 100%);
}

.user-msg {
    background: var(--accent) !important;
    color: white !important;
    border-radius: 15px !important;
    padding: 12px 20px !important;
    margin: 8px 0 !important;
    max-width: 80% !important;
}

.bot-msg {
    background: white !important;
    border: 2px solid var(--primary) !important;
    border-radius: 15px !important;
    padding: 12px 20px !important;
    margin: 8px 0 !important;
    max-width: 80% !important;
}

.special-tag {
    color: var(--primary) !important;
    font-weight: 600;
    text-shadow: 1px 1px 2px rgba(0,0,0,0.1);
}

.credit {
    text-align: center;
    padding: 15px;
    margin-top: 20px;
    background: rgba(76, 175, 80, 0.1);
    border-radius: 10px;
}

.dark .bot-msg {
    background: #2d2d2d !important;
    color: white !important;
}

.submit-btn {
    background: var(--primary) !important;
    color: white !important;
    border-radius: 8px !important;
    padding: 12px 24px !important;
    transition: all 0.3s ease !important;
}

.submit-btn:hover {
    transform: translateY(-2px);
    box-shadow: 0 5px 15px rgba(76, 175, 80, 0.3) !important;
}
"""

class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        return input_ids[0][-1] == tokenizer.eos_token_id

def initialize_model():
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
    )

    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        device_map="cuda",
        quantization_config=quantization_config,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True
    ).to("cuda")

    return model, tokenizer

def clean_placeholders(text: str) -> str:
    """
    Remove or replace the system placeholders from the streamed text.
    1) Replace everything from <|im_start|>system to <|im_start|>assistant with 'Thinking...'
    2) Remove any leftover <|im_start|>assistant or <|im_start|>user
    """
    # Replace entire block: <|im_start|>system ... <|im_start|>assistant
    text = re.sub(
        r"<\|im_start\|>system.*?<\|im_start\|>assistant", 
        "Thinking...", 
        text, 
        flags=re.DOTALL
    )
    # Remove any lingering tags
    text = text.replace("<|im_start|>assistant", "")
    text = text.replace("<|im_start|>user", "")
    return text

def format_response(text):
    """
    Format the final text by:
    1) removing system placeholders
    2) highlighting reasoning tags [Understand], [Plan], etc.
    """
    # 1) Clean placeholders
    text = clean_placeholders(text)
    
    # 2) Replace special bracketed tags with styled HTML
    return (text
            .replace("[Understand]", '\n<strong class="special-tag">[Understand]</strong>\n')
            .replace("[Plan]", '\n<strong class="special-tag">[Plan]</strong>\n')
            .replace("[Conclude]", '\n<strong class="special-tag">[Conclude]</strong>\n')
            .replace("[Reason]", '\n<strong class="special-tag">[Reason]</strong>\n')
            .replace("[Verify]", '\n<strong class="special-tag">[Verify]</strong>\n'))

@spaces.GPU(duration=360)
def generate_response(message, chat_history, system_prompt, temperature, max_tokens):
    """
    Stream tokens from the LLM. 
    Remove/replace internal placeholders so the user only sees the final assistant text.
    """
    # Build conversation for model input
    conversation = [{"role": "system", "content": system_prompt}]
    for user_msg, bot_msg in chat_history:
        # Strip HTML tags from user messages for model input
        plain_user_msg = user_msg.replace('<div class="user-msg">', '').replace('</div>', '')
        conversation.extend([
            {"role": "user", "content": plain_user_msg},
            {"role": "assistant", "content": bot_msg}
        ])
    conversation.append({"role": "user", "content": message})

    # Tokenize using the model's chat template
    input_ids = tokenizer.apply_chat_template(
        conversation,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

    # Setup streaming generation
    streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
    generate_kwargs = dict(
        input_ids=input_ids,
        streamer=streamer,
        max_new_tokens=max_tokens,
        temperature=temperature,
        stopping_criteria=StoppingCriteriaList([StopOnTokens()])
    )

    Thread(target=model.generate, kwargs=generate_kwargs).start()

    partial_message = ""
    # Wrap the user message in a styled div for display
    styled_user = f'<div class="user-msg">{message}</div>'
    new_history = chat_history + [(styled_user, "")]

    for new_token in streamer:
        partial_message += new_token
        # Format partial response by removing placeholders in real-time
        formatted = format_response(partial_message)
        new_history[-1] = (styled_user, formatted + "▌")
        yield new_history

    # Finalize the message (remove the trailing cursor, placeholders, etc.)
    new_history[-1] = (styled_user, format_response(partial_message))
    yield new_history

model, tokenizer = initialize_model()

# -------------------------------
# Gradio Interface Layout
# -------------------------------
with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="green")) as demo:
    with gr.Column():
        gr.Markdown("""
        <h1 align="center" style="color: var(--primary); font-weight: 800; margin-bottom: 0;">
            🧠 Philosopher AI
        </h1>
        <p align="center" style="color: #666; font-size: 1.1em;">
            Exploring the Depths of Ethical Reasoning and Philosophical Inquiry
        </p>
        """)
        
        chatbot = gr.Chatbot(label="Dialogue", elem_classes=["gr-chatbot"])
        
        with gr.Row():
            msg = gr.Textbox(
                label="Your Philosophical Inquiry",
                placeholder="Contemplate your question here...",
                container=False,
                scale=5
            )
            submit_btn = gr.Button("Ponder ➔", elem_classes="submit-btn", scale=1)
        
        with gr.Accordion("🛠️ Wisdom Controls", open=False):
            with gr.Row():
                system_prompt = gr.TextArea(
                    value=DEFAULT_SYSTEM_PROMPT, 
                    label="Guiding Principles",
                    info="Modify the assistant's foundational reasoning framework"
                )
                with gr.Column():
                    temperature = gr.Slider(0, 1, value=0.3, 
                                         label="Creative Freedom",
                                         info="0 = Strict, 1 = Inventive")
                    max_tokens = gr.Slider(128, 8192, value=2048, 
                                        label="Response Depth",
                                        step=128)
        
        gr.Examples(
            examples=question_examples,
            inputs=msg,
            label="🧩 Thought Experiments",
            examples_per_page=3
        )
        
        gr.Markdown("""
        <div class="credit">
            Crafted with 🧠 by <a href="https://ruslanmv.com" target="_blank" style="color: var(--primary);">ruslanmv.com</a>
        </div>
        """)

    msg.submit(
        generate_response,
        [msg, chatbot, system_prompt, temperature, max_tokens],
        chatbot
    )
    submit_btn.click(
        generate_response,
        [msg, chatbot, system_prompt, temperature, max_tokens],
        chatbot
    )
    clear = gr.Button("Clear Dialogue")
    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    demo.queue().launch()