neuralworm commited on
Commit
12da3d7
Β·
1 Parent(s): fcc55bd

update app.py, update requirements.txt

Browse files
Files changed (2) hide show
  1. app.py +102 -136
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,17 +1,12 @@
1
  ###############################################################################
2
- # app.py – EAL Emergent-Discourse Analyzer (v0.8 β€’ multi-model, VRAM-safe)
3
  ###############################################################################
4
  import gc, io, json, re, time, base64
5
  import torch, numpy as np, matplotlib, matplotlib.pyplot as plt, seaborn as sns
6
  import gradio as gr
 
7
  from sklearn.metrics.pairwise import cosine_similarity
8
  from sklearn.cluster import KMeans
9
- from transformers import AutoTokenizer, AutoModelForCausalLM
10
-
11
- # β–Έβ–Έ force the right SDPA backend for GPUs < SM80
12
- torch.backends.cuda.enable_flash_sdp(False)
13
- torch.backends.cuda.enable_math_sdp(False)
14
- torch.backends.cuda.enable_mem_efficient_sdp(True)
15
 
16
  matplotlib.use("Agg") # headless
17
 
@@ -19,206 +14,177 @@ matplotlib.use("Agg") # headless
19
  # 1 Β· Registry of models
20
  # ──────────────────────────────────────────────────────────────────────────────
21
  AVAILABLE_MODELS = {
22
- "GPT-Neox-1.3B" : "EleutherAI/gpt-neo-1.3B",
23
- "GPT-2" : "gpt2",
24
- "Gemma-3-1B-IT" : "google/gemma-3-1b-it", # float-16 branch used below
 
 
25
  }
26
 
27
- _loaded = {} # name β†’ {tok, model, ctx, dev}
28
- _current = None # active name
29
-
30
- # debug log (full prompts + answers)
31
  dbg_log: list[str] = []
 
32
  def dbg(msg: str) -> None:
33
- stamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
34
- line = f"[{stamp}] {msg}"
35
  dbg_log.append(line)
36
  print(line)
37
 
38
  # ──────────────────────────────────────────────────────────────────────────────
39
- # 2 Β· Loader / Unloader helpers
40
  # ──────────────────────────────────────────────────────────────────────────────
 
 
 
 
 
41
  def _unload_current():
42
- """Move old model to CPU & free CUDA VRAM."""
43
  global _current
44
  if _current and _current in _loaded:
45
- mdl = _loaded[_current]["model"]
46
- mdl.to("cpu")
47
- del mdl
48
- torch.cuda.empty_cache()
49
- gc.collect()
50
  _current = None
51
 
52
  def _load(name: str):
53
- """Lazy-load model, honouring memory limits, caching, dtype presets."""
54
  global tokenizer, model, MODEL_CTX, device, _current
55
- if name == _current:
56
- return # nothing to do
57
-
58
  dbg(f"[boot] switching β†’ {name}")
59
- _unload_current() # free VRAM first
60
 
61
- if name in _loaded: # cached
62
  obj = _loaded[name]
63
  tokenizer, model, MODEL_CTX, device = obj["tok"], obj["model"], obj["ctx"], obj["dev"]
64
- _current = name
65
- return
66
 
67
  repo = AVAILABLE_MODELS[name]
68
- kwargs = {"device_map": None} # we manage .to(...)
69
- kwargs.update(dict(torch_dtype=torch.float16))
70
-
71
  tok = AutoTokenizer.from_pretrained(repo, use_fast=True)
72
- mdl = AutoModelForCausalLM.from_pretrained(repo, **kwargs)
73
  dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
74
  mdl.to(dev).eval()
75
 
76
- ctx = getattr(mdl.config, "max_position_embeddings", 2048)
77
- # Gemma-3 config reports an absurd 1e15 – clamp sensibly
78
- ctx = int(min(ctx, 8192))
79
-
80
  if tok.pad_token is None:
81
  tok.pad_token = tok.eos_token
82
  mdl.config.pad_token_id = mdl.config.eos_token_id
83
 
84
  _loaded[name] = {"tok": tok, "model": mdl, "ctx": ctx, "dev": dev}
85
  tokenizer, model, MODEL_CTX, device, _current = tok, mdl, ctx, dev, name
86
- dbg(f"[boot] {name} ready (ctx={ctx}, dev={dev}, dtype={mdl.dtype})")
87
 
88
- # prime a default so UI pops instantly
89
  _load("GPT-Neox-1.3B")
90
 
91
  # ──────────────────────────────────────────────────────────────────────────────
92
- # 3 Β· Utility fns
93
  # ──────────────────────────────────────────────────────────────────────────────
94
- PROMPT_HEADROOM = 300
95
- MAX_GEN = 100
96
- def trim(txt: str, reserve: int = 80) -> str:
97
- toks = tokenizer.encode(txt, add_special_tokens=False)
98
- keep = MODEL_CTX - PROMPT_HEADROOM - reserve
99
- return tokenizer.decode(toks[-keep:], skip_special_tokens=True) if len(toks) > keep else txt
100
-
101
- _quote = re.compile(r'"')
102
- def esc(s: str) -> str: return _quote.sub('\\"', s)
103
-
104
- def cosine(a: str, b: str) -> float:
105
- bad = ("[Generation Error", "[Context window full]", "[Model not")
106
- if any(m in a for m in bad) or any(m in b for m in bad): return 0.0
107
  with torch.inference_mode():
108
  emb = model.get_input_embeddings()
109
  ta = emb(tokenizer(a, return_tensors="pt").to(device).input_ids).mean(1)
110
  tb = emb(tokenizer(b, return_tensors="pt").to(device).input_ids).mean(1)
111
- v = float(cosine_similarity(ta.cpu(), tb.cpu())[0, 0])
112
- return max(min(v, 1.0), -1.0)
113
 
114
- # ──────────────────────────────────────────────────────────────────────────────
115
- # 4 Β· Generation (full prompt / answer into log)
116
- # ──────────────────────────────────────────────────────────────────────────────
117
- def generate(prompt: str, temp: float) -> str:
118
  dbg(f"PROMPT >>> {prompt}")
119
  with torch.inference_mode():
120
  inp = tokenizer(prompt, return_tensors="pt").to(device)
121
  out = model.generate(
122
  **inp,
123
- max_length=min(inp.input_ids.size(1) + MAX_GEN, MODEL_CTX),
124
- temperature=temp,
125
- top_p=0.9,
126
- repetition_penalty=1.2,
127
- no_repeat_ngram_size=3,
128
  pad_token_id=tokenizer.pad_token_id,
129
  )
130
  ans = tokenizer.decode(out[0][inp.input_ids.size(1):], skip_special_tokens=True).strip()
131
  dbg(f"OUTPUT <<< {ans}")
132
  return ans or "[Empty]"
133
 
134
- # ──────────────────────────────────────────────────────────────────────────────
135
- # 5 Β· Heat-map helper
136
- # ──────────────────────────────────────────────────────────────────────────────
137
- def heat(mat: np.ndarray, labels: list[str], title: str) -> str:
138
- mask = np.isnan(mat)
139
- fig, ax = plt.subplots(figsize=(max(8, len(labels)), max(7, len(labels)*0.9)))
140
- sns.heatmap(mat, mask=mask, annot=True, cmap="plasma", fmt=".2f",
141
- vmin=np.nanmin(mat)*0.97, vmax=1, annot_kws={"size":7},
142
  xticklabels=labels, yticklabels=labels, ax=ax)
143
- plt.xticks(rotation=45, ha="right"); plt.yticks(rotation=0)
144
- ax.set_title(title, pad=18); plt.tight_layout(pad=2.3)
145
- buf = io.BytesIO(); plt.savefig(buf, format="png"); plt.close(fig); buf.seek(0)
146
- b64 = base64.b64encode(buf.read()).decode()
147
- return f"<img src='data:image/png;base64,{b64}' style='max-width:95%;height:auto;'/>"
148
 
149
  # ──────────────────────────────────────────────────────────────────────────────
150
- # 6 Β· Main EAL routine
151
  # ──────────────────────────────────────────────────────────────────────────────
152
- def run_eal(iters: int, mdl_name: str, prog=gr.Progress()):
153
- dbg_log.clear()
154
- _load(mdl_name)
155
-
156
- I, nI, dI, dnI, dx = [None]*iters, [None]*iters, [None]*iters, [None]*iters, [None]*iters
157
- seed = "A thinking process begins. The first thought is:"
158
  for k in range(iters):
159
- prm = seed if k == 0 else (
160
  f'The thought process previously generated: "{esc(trim(I[k-1],60))}"\n\n'
161
  "Task: Continue this line of thought. What logically follows or develops?"
162
  )
163
- I[k] = generate(prm, 0.7)
164
- prm_n = (
165
- f'Consider the statement: "{esc(trim(I[k],80))}"\n\n'
166
- "Task: Explore alternative perspectives or potential issues. "
167
- "What might be a contrasting viewpoint or an overlooked aspect?"
168
- )
169
- nI[k] = generate(prm_n, 0.9)
170
- if k: dI[k] = cosine(I[k-1], I[k]); dnI[k] = cosine(nI[k-1], nI[k])
171
- dx[k] = cosine(I[k], nI[k])
172
- prog((k+1)/iters)
173
-
174
- # simple clustering
175
- labels = [f"I{k}" for k in range(iters)] + [f"Β¬I{k}" for k in range(iters)]
176
- vecs, val_lab = [], []
177
- emb = model.get_input_embeddings()
178
  with torch.inference_mode():
179
- for txt, lbl in zip(I+nI, labels):
180
- if txt.startswith("["): continue
181
- vecs.append(emb(tokenizer(txt, return_tensors="pt").to(device).input_ids).mean(1).cpu().numpy().squeeze())
182
- val_lab.append(lbl)
183
- clus = {l: "N/A" for l in labels}
184
- if len(vecs) >= 2:
185
- km = KMeans(n_clusters=2, random_state=0, n_init=10).fit(np.vstack(vecs))
186
- clus.update({l: f"C{c}" for l, c in zip(val_lab, km.labels_)})
187
-
188
- def block(seq, tag):
189
- return "\n\n---\n\n".join(f"**{tag}{i} [{clus.get(f'{tag}{i}','N/A')}]**:\n{txt}" for i, txt in enumerate(seq))
190
-
191
- tbl = ["|Iter|Ξ”S(I)|Ξ”S(Β¬I)|Ξ”S(I,Β¬I)|", "|:--:|:---:|:----:|:------:|"]
192
- tbl += [f"|{i}|{('N/A' if dI[i] is None else f'{dI[i]:.4f}')}|"
193
- f"{('N/A' if dnI[i] is None else f'{dnI[i]:.4f}')}|"
194
- f"{('N/A' if dx[i] is None else f'{dx[i]:.4f}')}|" for i in range(iters)]
195
-
196
- n = len(labels); m = np.full((n,n), np.nan)
197
  for a in range(n):
198
- for b in range(a, n):
199
- sim = 1 if a==b else cosine((I+nI)[a], (I+nI)[b])
200
- m[a,b]=m[b,a]=sim
201
 
202
- return (block(I,"I"), block(nI,"Β¬I"), "\n".join(tbl),
203
- "\n".join(dbg_log),
204
- heat(m, labels, f"Similarity Matrix ({iters} iters β€’ {mdl_name})"))
205
 
206
  # ──────────────────────────────────────────────────────────────────────────────
207
- # 7 Β· Gradio UI
208
  # ──────────────────────────────────────────────────────────────────────────────
209
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal")) as demo:
210
- gr.Markdown("## EAL Β· Emergent Discourse Analyzer (Neox ≫ Gemma ≫ GPT-2)")
211
- mdl_dd = gr.Dropdown(label="Model", choices=list(AVAILABLE_MODELS.keys()), value="GPT-Neox-1.3B")
212
- iters = gr.Slider(1, 100, 3, 1, label="Iterations")
213
- run = gr.Button("Run πŸš€", variant="primary")
214
  with gr.Tabs():
215
  with gr.Tab("Traces"):
216
- out_I, out_nI = gr.Markdown(), gr.Markdown()
217
  with gr.Tab("Ξ”S + Heatmap"):
218
- out_tbl, out_hm = gr.Markdown(), gr.HTML()
219
  with gr.Tab("Debug (full prompts & answers)"):
220
- out_dbg = gr.Textbox(lines=26, interactive=False, show_copy_button=True)
221
- run.click(run_eal, inputs=[iters, mdl_dd], outputs=[out_I, out_nI, out_tbl, out_dbg, out_hm])
222
 
223
- if __name__ == "__main__":
224
  demo.launch()
 
1
  ###############################################################################
2
+ # app.py – EAL Emergent-Discourse Analyzer (Gemma 1 / 2 / 3 compliant)
3
  ###############################################################################
4
  import gc, io, json, re, time, base64
5
  import torch, numpy as np, matplotlib, matplotlib.pyplot as plt, seaborn as sns
6
  import gradio as gr
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM
8
  from sklearn.metrics.pairwise import cosine_similarity
9
  from sklearn.cluster import KMeans
 
 
 
 
 
 
10
 
11
  matplotlib.use("Agg") # headless
12
 
 
14
  # 1 Β· Registry of models
15
  # ──────────────────────────────────────────────────────────────────────────────
16
  AVAILABLE_MODELS = {
17
+ "GPT-Neox-1.3B" : "EleutherAI/gpt-neo-1.3B",
18
+ "GPT-2" : "gpt2",
19
+ "Gemma 1.1 2B-IT" : "google/gemma-1.1-2b-it",
20
+ "Gemma 2 2B-IT" : "google/gemma-2-2b-it",
21
+ "Gemma 3 1B-IT" : "google/gemma-3-1b-it",
22
  }
23
 
24
+ _loaded, _current = {}, None
 
 
 
25
  dbg_log: list[str] = []
26
+
27
  def dbg(msg: str) -> None:
28
+ ts = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
29
+ line = f"[{ts}] {msg}"
30
  dbg_log.append(line)
31
  print(line)
32
 
33
  # ──────────────────────────────────────────────────────────────────────────────
34
+ # 2 Β· Loader helpers (BF16-aware & VRAM-safe)
35
  # ──────────────────────────────────────────────────────────────────────────────
36
+ def _gpu_supports_bf16() -> bool:
37
+ if not torch.cuda.is_available(): return False
38
+ major, _ = torch.cuda.get_device_capability()
39
+ return major >= 8 # Ampere (8.0) or newer
40
+
41
  def _unload_current():
 
42
  global _current
43
  if _current and _current in _loaded:
44
+ _loaded[_current]["model"].to("cpu")
45
+ torch.cuda.empty_cache(); gc.collect()
 
 
 
46
  _current = None
47
 
48
  def _load(name: str):
49
+ """Lazy load or swap in the requested model."""
50
  global tokenizer, model, MODEL_CTX, device, _current
51
+ if name == _current: return
 
 
52
  dbg(f"[boot] switching β†’ {name}")
53
+ _unload_current()
54
 
55
+ if name in _loaded: # cached
56
  obj = _loaded[name]
57
  tokenizer, model, MODEL_CTX, device = obj["tok"], obj["model"], obj["ctx"], obj["dev"]
58
+ _current = name; return
 
59
 
60
  repo = AVAILABLE_MODELS[name]
61
+ torch_dtype = torch.bfloat16 if _gpu_supports_bf16() else torch.float16
 
 
62
  tok = AutoTokenizer.from_pretrained(repo, use_fast=True)
63
+ mdl = AutoModelForCausalLM.from_pretrained(repo, torch_dtype=torch_dtype)
64
  dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
65
  mdl.to(dev).eval()
66
 
67
+ ctx_raw = getattr(mdl.config, "max_position_embeddings", 2048)
68
+ ctx = int(min(ctx_raw, 8192)) # Gemma-3 reports 1e15 – clamp
 
 
69
  if tok.pad_token is None:
70
  tok.pad_token = tok.eos_token
71
  mdl.config.pad_token_id = mdl.config.eos_token_id
72
 
73
  _loaded[name] = {"tok": tok, "model": mdl, "ctx": ctx, "dev": dev}
74
  tokenizer, model, MODEL_CTX, device, _current = tok, mdl, ctx, dev, name
75
+ dbg(f"[boot] {name} ready (ctx={ctx}, dev={dev}, dtype={torch_dtype})")
76
 
77
+ # prime default
78
  _load("GPT-Neox-1.3B")
79
 
80
  # ──────────────────────────────────────────────────────────────────────────────
81
+ # 3 Β· Utility fns (unchanged)
82
  # ──────────────────────────────────────────────────────────────────────────────
83
+ PROMPT_HEADROOM, MAX_GEN = 300, 100
84
+ _q = re.compile(r'"')
85
+ def esc(t): return _q.sub('\\"', t)
86
+
87
+ def trim(t, rv=80):
88
+ toks = tokenizer.encode(t, add_special_tokens=False)
89
+ keep = MODEL_CTX - PROMPT_HEADROOM - rv
90
+ return tokenizer.decode(toks[-keep:], skip_special_tokens=True) if len(toks) > keep else t
91
+
92
+ def cosine(a, b):
93
+ noisy = ("[Generation Error", "[Context window full]", "[Model not")
94
+ if any(m in a for m in noisy) or any(m in b for m in noisy): return 0.0
 
95
  with torch.inference_mode():
96
  emb = model.get_input_embeddings()
97
  ta = emb(tokenizer(a, return_tensors="pt").to(device).input_ids).mean(1)
98
  tb = emb(tokenizer(b, return_tensors="pt").to(device).input_ids).mean(1)
99
+ return max(min(float(cosine_similarity(ta.cpu(), tb.cpu())[0,0]),1),-1)
 
100
 
101
+ def generate(prompt, temp):
 
 
 
102
  dbg(f"PROMPT >>> {prompt}")
103
  with torch.inference_mode():
104
  inp = tokenizer(prompt, return_tensors="pt").to(device)
105
  out = model.generate(
106
  **inp,
107
+ max_length=min(inp.input_ids.size(1)+MAX_GEN, MODEL_CTX),
108
+ temperature=temp, top_p=0.9,
109
+ repetition_penalty=1.2, no_repeat_ngram_size=3,
 
 
110
  pad_token_id=tokenizer.pad_token_id,
111
  )
112
  ans = tokenizer.decode(out[0][inp.input_ids.size(1):], skip_special_tokens=True).strip()
113
  dbg(f"OUTPUT <<< {ans}")
114
  return ans or "[Empty]"
115
 
116
+ def heat(mat, labels, title):
117
+ mask=np.isnan(mat)
118
+ fig, ax=plt.subplots(figsize=(max(8,len(labels)), max(7,len(labels)*0.9)))
119
+ sns.heatmap(mat,mask=mask,annot=True,cmap="plasma",fmt=".2f",
120
+ vmin=np.nanmin(mat)*0.97,vmax=1,annot_kws={"size":7},
 
 
 
121
  xticklabels=labels, yticklabels=labels, ax=ax)
122
+ plt.xticks(rotation=45,ha="right"); plt.yticks(rotation=0)
123
+ ax.set_title(title,pad=18); plt.tight_layout(pad=2.3)
124
+ buf=io.BytesIO(); plt.savefig(buf,format="png"); plt.close(fig); buf.seek(0)
125
+ return f"<img src='data:image/png;base64,{base64.b64encode(buf.read()).decode()}' style='max-width:95%;height:auto;'/>"
 
126
 
127
  # ──────────────────────────────────────────────────────────────────────────────
128
+ # 4 Β· Main EAL routine (unchanged logic)
129
  # ──────────────────────────────────────────────────────────────────────────────
130
+ def run_eal(iters:int, mdl:str, prog=gr.Progress()):
131
+ dbg_log.clear(); _load(mdl)
132
+ I,nI,dI,dnI,dx=[None]*iters,[None]*iters,[None]*iters,[None]*iters,[None]*iters
133
+ seed="A thinking process begins. The first thought is:"
 
 
134
  for k in range(iters):
135
+ prm = seed if not k else (
136
  f'The thought process previously generated: "{esc(trim(I[k-1],60))}"\n\n'
137
  "Task: Continue this line of thought. What logically follows or develops?"
138
  )
139
+ I[k]=generate(prm,0.7)
140
+ prm_n=(f'Consider the statement: "{esc(trim(I[k],80))}"\n\n'
141
+ "Task: Explore alternative perspectives or potential issues. "
142
+ "What might be a contrasting viewpoint or an overlooked aspect?")
143
+ nI[k]=generate(prm_n,0.9)
144
+ if k: dI[k]=cosine(I[k-1],I[k]); dnI[k]=cosine(nI[k-1],nI[k])
145
+ dx[k]=cosine(I[k],nI[k]); prog((k+1)/iters)
146
+
147
+ # clusters
148
+ labels=[f"I{k}" for k in range(iters)]+[f"Β¬I{k}" for k in range(iters)]
149
+ vecs,lab=[],[]
 
 
 
 
150
  with torch.inference_mode():
151
+ emb=model.get_input_embeddings()
152
+ for t,l in zip(I+nI,labels):
153
+ if t.startswith("["):continue
154
+ vecs.append(emb(tokenizer(t,return_tensors="pt").to(device).input_ids).mean(1).cpu().numpy().squeeze()); lab.append(l)
155
+ clus={l:"N/A" for l in labels}
156
+ if len(vecs)>=2: clus.update({l:f"C{c}" for l,c in zip(lab,KMeans(2,random_state=0,n_init=10).fit(np.vstack(vecs)).labels_)})
157
+
158
+ def block(seq,tag): return "\n\n---\n\n".join(f"**{tag}{i} [{clus.get(f'{tag}{i}','N/A')}]**:\n{t}" for i,t in enumerate(seq))
159
+ tbl=["|Iter|Ξ”S(I)|Ξ”S(Β¬I)|Ξ”S(I,Β¬I)|","|:--:|:---:|:----:|:------:|"]
160
+ tbl+=[f"|{i}|{('N/A' if dI[i] is None else f'{dI[i]:.4f}')}|"
161
+ f"{('N/A' if dnI[i] is None else f'{dnI[i]:.4f}')}|"
162
+ f"{('N/A' if dx[i] is None else f'{dx[i]:.4f}')}|" for i in range(iters)]
163
+
164
+ n=len(labels); mat=np.full((n,n),np.nan)
 
 
 
 
165
  for a in range(n):
166
+ for b in range(a,n):
167
+ sim=1 if a==b else cosine((I+nI)[a],(I+nI)[b])
168
+ mat[a,b]=mat[b,a]=sim
169
 
170
+ return block(I,"I"),block(nI,"Β¬I"),"\n".join(tbl),"\n".join(dbg_log),heat(mat,labels,f"Similarity Matrix ({iters} iters β€’ {mdl})")
 
 
171
 
172
  # ──────────────────────────────────────────────────────────────────────────────
173
+ # 5 Β· Gradio UI
174
  # ──────────────────────────────────────────────────────────────────────────────
175
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal")) as demo:
176
+ gr.Markdown("## EAL Β· Emergent-Discourse Analyzer (Gemma 1 / 2 / 3 ready)")
177
+ mdl_dd=gr.Dropdown(list(AVAILABLE_MODELS.keys()),value="GPT-Neox-1.3B",label="Model")
178
+ iters=gr.Slider(1,7,3,1,label="Iterations")
179
+ run=gr.Button("Run πŸš€",variant="primary")
180
  with gr.Tabs():
181
  with gr.Tab("Traces"):
182
+ outI,outnI=gr.Markdown(),gr.Markdown()
183
  with gr.Tab("Ξ”S + Heatmap"):
184
+ outTbl,outHm=gr.Markdown(),gr.HTML()
185
  with gr.Tab("Debug (full prompts & answers)"):
186
+ outDbg=gr.Textbox(lines=26,interactive=False,show_copy_button=True)
187
+ run.click(run_eal,[iters,mdl_dd],[outI,outnI,outTbl,outDbg,outHm])
188
 
189
+ if __name__=="__main__":
190
  demo.launch()
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  transformers>=4.40.0
2
- torch==2.5.1
3
  scikit-learn>=1.2.0
4
  gradio>=4.0.0
5
  matplotlib==3.10.3
 
1
  transformers>=4.40.0
2
+ torch>=2.0.0
3
  scikit-learn>=1.2.0
4
  gradio>=4.0.0
5
  matplotlib==3.10.3