Spaces:
Sleeping
Sleeping
File size: 3,549 Bytes
1d1182e 1d0a230 1d1182e 1d0a230 1d1182e 1d0a230 1d1182e 1d0a230 1d1182e 1d0a230 1d1182e 1d0a230 1d1182e 1d0a230 58d7c9e 1d1182e 58d7c9e 1d0a230 1d1182e 58d7c9e 1d1182e 58d7c9e 1d1182e 58d7c9e 1d1182e 58d7c9e 1d1182e 58d7c9e 1d1182e 58d7c9e 1d1182e 1d0a230 1d1182e 58d7c9e 1d1182e 58d7c9e 1d1182e 58d7c9e 1d0a230 1d1182e 58d7c9e 1d1182e 58d7c9e 1d0a230 1d1182e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import gradio as gr
# Load GPT-2 and tokenizer
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Debug log list
debug_log = []
def debug(msg):
print(msg) # Console log (local)
debug_log.append(str(msg)) # Collect for UI
# Generate a GPT-2 response
def generate_response(prompt, max_length=100):
debug(f"Generating response for prompt:\n{prompt}")
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = model.generate(
**inputs,
max_length=len(inputs["input_ids"][0]) + max_length,
pad_token_id=tokenizer.eos_token_id,
do_sample=True,
temperature=0.9,
top_p=0.95,
)
result = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
debug(f"Generated output:\n{result}")
return result
# Compute cosine similarity of mean token embeddings
def similarity(a, b):
tok_a = tokenizer(a, return_tensors="pt").to(device)
tok_b = tokenizer(b, return_tensors="pt").to(device)
with torch.no_grad():
emb_a = model.transformer.wte(tok_a.input_ids).mean(dim=1)
emb_b = model.transformer.wte(tok_b.input_ids).mean(dim=1)
score = float(cosine_similarity(emb_a.cpu().numpy(), emb_b.cpu().numpy())[0][0])
debug(f"Similarity between outputs: {score}")
return score
# Main identity unfolding loop
def identity_unfolding(n_steps):
unfolding = []
ΔS_trace = []
log = []
debug_log.clear()
current_prompt = "The following is a system thinking about itself:\n"
for step in range(n_steps):
log.append(f"--- Step {step} ---")
log.append(f"[Prompt to GPT-2]:\n{current_prompt}")
response = generate_response(current_prompt)
unfolding.append(response)
log.append(f"[GPT-2 Response]:\n{response}")
if step > 0:
ΔS = similarity(unfolding[step - 1], unfolding[step])
ΔS_trace.append(round(ΔS, 4))
log.append(f"ΔS({step - 1} → {step}) = {round(ΔS, 4)}\n")
else:
log.append("ΔS not applicable for first step.\n")
current_prompt = (
f'The system has previously stated:\n"{response}"\n'
"Now it continues thinking about what that implies:\n"
)
summary = "\n".join(log)
trace_summary = "\n".join(
[f"ΔS({i} → {i+1}) = {ΔS_trace[i]}" for i in range(len(ΔS_trace))]
)
debug_output = "\n".join(debug_log)
return summary, trace_summary, debug_output
# Gradio interface
iface = gr.Interface(
fn=identity_unfolding,
inputs=gr.Slider(2, 10, value=5, step=1, label="Number of Identity Iterations"),
outputs=[
gr.Textbox(label="Full Trace (Prompts + GPT-2 Outputs)", lines=25),
gr.Textbox(label="ΔS Semantic Similarity Trace", lines=10),
gr.Textbox(label="Debug Log", lines=10),
],
title="GPT-2 Identity Emergence Analyzer (EAL Framework)",
description=(
"This app tests whether GPT-2 can recursively reflect on its own outputs. "
"It uses prompt-based recursion and cosine similarity (ΔS) to measure semantic stability across iterations. "
"Now includes a visible debug log."
),
)
if __name__ == "__main__":
iface.launch() |