import torch from transformers import GPT2LMHeadModel, GPT2Tokenizer from sklearn.metrics.pairwise import cosine_similarity import numpy as np import gradio as gr # Load model + tokenizer model_name = "gpt2" tokenizer = GPT2Tokenizer.from_pretrained(model_name) model = GPT2LMHeadModel.from_pretrained(model_name) model.eval() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # Generate response with visible prompt/response formatting def generate_response(prompt, max_length=100): inputs = tokenizer(prompt, return_tensors="pt").to(device) outputs = model.generate( **inputs, max_length=len(inputs["input_ids"][0]) + max_length, pad_token_id=tokenizer.eos_token_id, do_sample=True, temperature=0.9, top_p=0.95, ) return tokenizer.decode(outputs[0], skip_special_tokens=True).strip() # Cosine similarity to estimate ΔS def similarity(a, b): tok_a = tokenizer(a, return_tensors="pt").to(device) tok_b = tokenizer(b, return_tensors="pt").to(device) with torch.no_grad(): emb_a = model.transformer.wte(tok_a.input_ids).mean(dim=1) emb_b = model.transformer.wte(tok_b.input_ids).mean(dim=1) return float(cosine_similarity(emb_a.cpu().numpy(), emb_b.cpu().numpy())[0][0]) # Main loop: identity unfolding def identity_unfolding(n_steps): unfolding = [] ΔS_trace = [] log = [] current_prompt = "The following is a system thinking about itself:\n" for step in range(n_steps): log.append(f"--- Step {step} ---") log.append(f"[Prompt to GPT-2]:\n{current_prompt}") response = generate_response(current_prompt) unfolding.append(response) log.append(f"[GPT-2 Response]:\n{response}") if step > 0: ΔS = similarity(unfolding[step - 1], unfolding[step]) ΔS_trace.append(round(ΔS, 4)) log.append(f"ΔS({step - 1} → {step}) = {round(ΔS, 4)}\n") else: log.append("ΔS not applicable for first step.\n") current_prompt = ( f'The system has previously stated:\n"{response}"\n' "Now it continues thinking about what that implies:\n" ) summary = "\n".join(log) trace_summary = "\n".join( [f"ΔS({i} → {i+1}) = {ΔS_trace[i]}" for i in range(len(ΔS_trace))] ) return summary, trace_summary # Gradio interface iface = gr.Interface( fn=identity_unfolding, inputs=gr.Slider(2, 10, value=5, step=1, label="Number of Identity Iterations"), outputs=[ gr.Textbox(label="Full Trace (Prompts + GPT-2 Outputs)", lines=25), gr.Textbox(label="ΔS Semantic Similarity Trace", lines=10), ], title="GPT-2 Identity Emergence Analyzer (EAL Framework)", description=( "This app tests whether GPT-2 can recursively reflect on its own outputs. " "It uses prompt-based recursion and cosine similarity (ΔS) to measure semantic stability across iterations. " "A stabilizing identity shows high ΔS values close to 1.0 across iterations." ), ) if __name__ == "__main__": iface.launch()