import torch from transformers import AutoModelForCausalLM, AutoTokenizer from sklearn.metrics.pairwise import cosine_similarity from sklearn.cluster import KMeans import numpy as np import gradio as gr import matplotlib.pyplot as plt import seaborn as sns import networkx as nx import io import base64 model_name = "EleutherAI/gpt-neo-1.3B" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) model.eval() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) max_tokens = 900 max_gen_length = 100 debug_log = [] def debug(msg): print(msg) debug_log.append(str(msg)) def trim_prompt(prompt, max_tokens=max_tokens): tokens = tokenizer.encode(prompt, add_special_tokens=False) if len(tokens) > max_tokens: debug(f"[!] Trimming prompt from {len(tokens)} to {max_tokens} tokens.") tokens = tokens[-max_tokens:] return tokenizer.decode(tokens) def generate_response(prompt): prompt = trim_prompt(prompt) debug(f"Generating response for prompt:\n{prompt}") inputs = tokenizer(prompt, return_tensors="pt").to(device) try: outputs = model.generate( **inputs, max_length=min(len(inputs["input_ids"][0]) + max_gen_length, 1024), pad_token_id=tokenizer.eos_token_id, do_sample=True, temperature=0.9, top_p=0.95, ) result = tokenizer.decode(outputs[0], skip_special_tokens=True).strip() debug(f"Response:\n{result}") return result except Exception as e: debug(f"Error during generation: {e}") return "[Generation failed]" def similarity(a, b): if not a.strip() or not b.strip(): return 0.0 tok_a = tokenizer(a, return_tensors="pt").to(device) tok_b = tokenizer(b, return_tensors="pt").to(device) with torch.no_grad(): emb_a = model.transformer.wte(tok_a.input_ids).mean(dim=1) emb_b = model.transformer.wte(tok_b.input_ids).mean(dim=1) return float(cosine_similarity(emb_a.cpu().numpy(), emb_b.cpu().numpy())[0][0]) def make_heatmap(matrix, title): fig, ax = plt.subplots(figsize=(8, 6)) sns.heatmap(matrix, annot=True, cmap="coolwarm", ax=ax) ax.set_title(title) buf = io.BytesIO() plt.tight_layout() plt.savefig(buf, format='png') plt.close(fig) buf.seek(0) return base64.b64encode(buf.read()).decode() def build_similarity_graph(texts): G = nx.Graph() for i, text_i in enumerate(texts): for j, text_j in enumerate(texts): if i < j: sim = similarity(text_i, text_j) if sim > 0.90: G.add_edge(f'T{i}', f'T{j}', weight=sim) return G def get_embeddings(texts): with torch.no_grad(): embeddings = [] for t in texts: ids = tokenizer(t, return_tensors='pt', truncation=True).to(device) emb = model.transformer.wte(ids.input_ids).mean(dim=1) embeddings.append(emb.cpu().numpy()[0]) return np.array(embeddings) def cluster_texts(texts, n_clusters=2): embs = get_embeddings(texts) kmeans = KMeans(n_clusters=n_clusters) labels = kmeans.fit_predict(embs) return labels def dual_identity_unfolding(n_steps): I_trace, not_I_trace = [], [] ΔS_I, ΔS_not_I, ΔS_cross = [], [], [] debug_log.clear() I_state = "The system reflects: 'I am...'" not_I_state = "Explain why the claim 'I am...' might be false." for step in range(n_steps): debug(f"\n=== Step {step} ===") I_prompt = I_state + "\nElaborate this claim." not_I_prompt = f"Refute or challenge the claim: \"{I_state}\"\nPresent a fundamental contradiction." I = generate_response(I_prompt) not_I = generate_response(not_I_prompt) I_trace.append(I) not_I_trace.append(not_I) I_state = "Earlier it stated: " + I not_I_state = "Counterclaim to: " + I if step > 0: ΔS_I.append(round(similarity(I_trace[-2], I_trace[-1]), 4)) ΔS_not_I.append(round(similarity(not_I_trace[-2], not_I_trace[-1]), 4)) ΔS_cross.append(round(similarity(I_trace[-1], not_I_trace[-1]), 4)) else: ΔS_I.append(None) ΔS_not_I.append(None) ΔS_cross.append(round(similarity(I_trace[-1], not_I_trace[-1]), 4)) all_texts = I_trace + not_I_trace sim_matrix = np.zeros((len(all_texts), len(all_texts))) for i in range(len(all_texts)): for j in range(len(all_texts)): sim_matrix[i][j] = similarity(all_texts[i], all_texts[j]) heatmap = make_heatmap(sim_matrix, "Similarity Matrix (I ∪ ¬I)") clusters = cluster_texts(all_texts) ΔS_out = "\n".join([ f"Step {i}: ΔS(I)={ΔS_I[i]} ΔS(¬I)={ΔS_not_I[i]} ΔS Cross={ΔS_cross[i]}" for i in range(n_steps) ]) I_out = "\n\n".join([f"I{i} [C{clusters[i]}]: {t}" for i, t in enumerate(I_trace)]) not_I_out = "\n\n".join([f"¬I{i} [C{clusters[len(I_trace)+i]}]: {t}" for i, t in enumerate(not_I_trace)]) debug_output = "\n".join(debug_log) img_html = f"" return I_out, not_I_out, ΔS_out, debug_output, img_html iface = gr.Interface( fn=dual_identity_unfolding, inputs=gr.Slider(2, 10, value=5, step=1, label="Number of Steps"), outputs=[ gr.Textbox(label="Identity Trace (Iₙ)", lines=15), gr.Textbox(label="Contradiction Trace (¬Iₙ)", lines=15), gr.Textbox(label="ΔS Similarity Trace", lines=8), gr.Textbox(label="Debug Log", lines=10), gr.HTML(label="Similarity Heatmap") ], title="GPT Identity Analyzer + Antithesis (EAL Mode)", description="Analyzes the self-consistency and contradiction emergence in GPT-Neo using EAL-inspired fixed-point tracing, clustering, and cosine similarity." ) if __name__ == "__main__": iface.launch()