Spaces:
Sleeping
Sleeping
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
from sklearn.cluster import KMeans | |
import numpy as np | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import networkx as nx | |
import io | |
import base64 | |
model_name = "EleutherAI/gpt-neo-1.3B" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
model.eval() | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
max_tokens = 900 | |
max_gen_length = 100 | |
debug_log = [] | |
def debug(msg): | |
print(msg) | |
debug_log.append(str(msg)) | |
def trim_prompt(prompt, max_tokens=max_tokens): | |
tokens = tokenizer.encode(prompt, add_special_tokens=False) | |
if len(tokens) > max_tokens: | |
debug(f"[!] Trimming prompt from {len(tokens)} to {max_tokens} tokens.") | |
tokens = tokens[-max_tokens:] | |
return tokenizer.decode(tokens) | |
def generate_response(prompt): | |
prompt = trim_prompt(prompt) | |
debug(f"Generating response for prompt:\n{prompt}") | |
inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
try: | |
outputs = model.generate( | |
**inputs, | |
max_length=min(len(inputs["input_ids"][0]) + max_gen_length, 1024), | |
pad_token_id=tokenizer.eos_token_id, | |
do_sample=True, | |
temperature=0.9, | |
top_p=0.95, | |
) | |
result = tokenizer.decode(outputs[0], skip_special_tokens=True).strip() | |
debug(f"Response:\n{result}") | |
return result | |
except Exception as e: | |
debug(f"Error during generation: {e}") | |
return "[Generation failed]" | |
def similarity(a, b): | |
if not a.strip() or not b.strip(): | |
return 0.0 | |
tok_a = tokenizer(a, return_tensors="pt").to(device) | |
tok_b = tokenizer(b, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
emb_a = model.transformer.wte(tok_a.input_ids).mean(dim=1) | |
emb_b = model.transformer.wte(tok_b.input_ids).mean(dim=1) | |
return float(cosine_similarity(emb_a.cpu().numpy(), emb_b.cpu().numpy())[0][0]) | |
def make_heatmap(matrix, title): | |
fig, ax = plt.subplots(figsize=(8, 6)) | |
sns.heatmap(matrix, annot=True, cmap="coolwarm", ax=ax) | |
ax.set_title(title) | |
buf = io.BytesIO() | |
plt.tight_layout() | |
plt.savefig(buf, format='png') | |
plt.close(fig) | |
buf.seek(0) | |
return base64.b64encode(buf.read()).decode() | |
def build_similarity_graph(texts): | |
G = nx.Graph() | |
for i, text_i in enumerate(texts): | |
for j, text_j in enumerate(texts): | |
if i < j: | |
sim = similarity(text_i, text_j) | |
if sim > 0.90: | |
G.add_edge(f'T{i}', f'T{j}', weight=sim) | |
return G | |
def get_embeddings(texts): | |
with torch.no_grad(): | |
embeddings = [] | |
for t in texts: | |
ids = tokenizer(t, return_tensors='pt', truncation=True).to(device) | |
emb = model.transformer.wte(ids.input_ids).mean(dim=1) | |
embeddings.append(emb.cpu().numpy()[0]) | |
return np.array(embeddings) | |
def cluster_texts(texts, n_clusters=2): | |
embs = get_embeddings(texts) | |
kmeans = KMeans(n_clusters=n_clusters) | |
labels = kmeans.fit_predict(embs) | |
return labels | |
def dual_identity_unfolding(n_steps): | |
I_trace, not_I_trace = [], [] | |
ΔS_I, ΔS_not_I, ΔS_cross = [], [], [] | |
debug_log.clear() | |
I_state = "The system reflects: 'I am...'" | |
not_I_state = "Explain why the claim 'I am...' might be false." | |
for step in range(n_steps): | |
debug(f"\n=== Step {step} ===") | |
I_prompt = I_state + "\nElaborate this claim." | |
not_I_prompt = f"Refute or challenge the claim: \"{I_state}\"\nPresent a fundamental contradiction." | |
I = generate_response(I_prompt) | |
not_I = generate_response(not_I_prompt) | |
I_trace.append(I) | |
not_I_trace.append(not_I) | |
I_state = "Earlier it stated: " + I | |
not_I_state = "Counterclaim to: " + I | |
if step > 0: | |
ΔS_I.append(round(similarity(I_trace[-2], I_trace[-1]), 4)) | |
ΔS_not_I.append(round(similarity(not_I_trace[-2], not_I_trace[-1]), 4)) | |
ΔS_cross.append(round(similarity(I_trace[-1], not_I_trace[-1]), 4)) | |
else: | |
ΔS_I.append(None) | |
ΔS_not_I.append(None) | |
ΔS_cross.append(round(similarity(I_trace[-1], not_I_trace[-1]), 4)) | |
all_texts = I_trace + not_I_trace | |
sim_matrix = np.zeros((len(all_texts), len(all_texts))) | |
for i in range(len(all_texts)): | |
for j in range(len(all_texts)): | |
sim_matrix[i][j] = similarity(all_texts[i], all_texts[j]) | |
heatmap = make_heatmap(sim_matrix, "Similarity Matrix (I ∪ ¬I)") | |
clusters = cluster_texts(all_texts) | |
ΔS_out = "\n".join([ | |
f"Step {i}: ΔS(I)={ΔS_I[i]} ΔS(¬I)={ΔS_not_I[i]} ΔS Cross={ΔS_cross[i]}" | |
for i in range(n_steps) | |
]) | |
I_out = "\n\n".join([f"I{i} [C{clusters[i]}]: {t}" for i, t in enumerate(I_trace)]) | |
not_I_out = "\n\n".join([f"¬I{i} [C{clusters[len(I_trace)+i]}]: {t}" for i, t in enumerate(not_I_trace)]) | |
debug_output = "\n".join(debug_log) | |
img_html = f"<img src='data:image/png;base64,{heatmap}'/>" | |
return I_out, not_I_out, ΔS_out, debug_output, img_html | |
iface = gr.Interface( | |
fn=dual_identity_unfolding, | |
inputs=gr.Slider(2, 10, value=5, step=1, label="Number of Steps"), | |
outputs=[ | |
gr.Textbox(label="Identity Trace (Iₙ)", lines=15), | |
gr.Textbox(label="Contradiction Trace (¬Iₙ)", lines=15), | |
gr.Textbox(label="ΔS Similarity Trace", lines=8), | |
gr.Textbox(label="Debug Log", lines=10), | |
gr.HTML(label="Similarity Heatmap") | |
], | |
title="GPT Identity Analyzer + Antithesis (EAL Mode)", | |
description="Analyzes the self-consistency and contradiction emergence in GPT-Neo using EAL-inspired fixed-point tracing, clustering, and cosine similarity." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |