context-probing / app.py
cifkao's picture
Enable compact layout
af8abd6
raw
history blame
5.15 kB
from enum import Enum
from pathlib import Path
import streamlit as st
import streamlit.components.v1 as components
import torch
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer, BatchEncoding
root_dir = Path(__file__).resolve().parent
highlighted_text_component = components.declare_component(
"highlighted_text", path=root_dir / "highlighted_text" / "build"
)
def get_windows_batched(examples: BatchEncoding, window_len: int, stride: int = 1, pad_id: int = 0) -> BatchEncoding:
return BatchEncoding({
k: [
t[i][j : j + window_len] + [
pad_id if k == "input_ids" else 0
] * (j + window_len - len(t[i]))
for i in range(len(examples["input_ids"]))
for j in range(0, len(examples["input_ids"][i]) - 1, stride)
]
for k, t in examples.items()
})
BAD_CHAR = chr(0xfffd)
def ids_to_readable_tokens(tokenizer, ids, strip_whitespace=False):
cur_ids = []
result = []
for idx in ids:
cur_ids.append(idx)
decoded = tokenizer.decode(cur_ids)
if BAD_CHAR not in decoded:
if strip_whitespace:
decoded = decoded.strip()
result.append(decoded)
del cur_ids[:]
else:
result.append("")
return result
compact_layout = st.experimental_get_query_params().get("compact", ["false"]) == ["true"]
if not compact_layout:
st.header("Context length probing")
model_name = st.selectbox("Model", ["distilgpt2", "gpt2", "EleutherAI/gpt-neo-125m"])
metric_name = st.selectbox("Metric", ["KL divergence", "Cross entropy"], index=1)
window_len = st.select_slider(
r"Window size ($c_\text{max}$)",
options=[8, 16, 32, 64, 128, 256, 512, 1024],
value=512
)
DEFAULT_TEXT = """
We present context length probing, a novel explanation technique for causal
language models, based on tracking the predictions of a model as a function of the length of
available context, and allowing to assign differential importance scores to different contexts.
The technique is model-agnostic and does not rely on access to model internals beyond computing
token-level probabilities. We apply context length probing to large pre-trained language models
and offer some initial analyses and insights, including the potential for studying long-range
dependencies.
""".replace("\n", " ").strip()
text = st.text_area(
"Input text",
DEFAULT_TEXT,
)
if metric_name == "KL divergence":
st.error("KL divergence is not supported yet. Stay tuned!", icon="😭")
st.stop()
with st.spinner("Loading model…"):
tokenizer = st.cache_resource(AutoTokenizer.from_pretrained, show_spinner=False)(model_name)
model = st.cache_resource(AutoModelForCausalLM.from_pretrained, show_spinner=False)(model_name)
inputs = tokenizer([text])
[input_ids] = inputs["input_ids"]
window_len = min(window_len, len(input_ids))
if len(input_ids) < 2:
st.error("Please enter at least 2 tokens.", icon="🚨")
st.stop()
@st.cache_data(show_spinner=False)
@torch.inference_mode()
def get_logits(_model, _inputs, cache_key):
del cache_key
return _model(**_inputs).logits.to(torch.float16)
@st.cache_data(show_spinner=False)
@torch.inference_mode()
def run_context_length_probing(_model, _tokenizer, _inputs, window_len, cache_key):
del cache_key
inputs_sliding = get_windows_batched(
_inputs,
window_len=window_len,
pad_id=_tokenizer.eos_token_id
).convert_to_tensors("pt")
logits = []
with st.spinner("Running model…"):
batch_size = 8
num_items = len(inputs_sliding["input_ids"])
pbar = st.progress(0)
for i in range(0, num_items, batch_size):
pbar.progress(i / num_items, f"{i}/{num_items}")
batch = {k: v[i:i + batch_size] for k, v in inputs_sliding.items()}
logits.append(
get_logits(
_model,
batch,
cache_key=(model_name, batch["input_ids"].cpu().numpy().tobytes())
)
)
logits = torch.cat(logits, dim=0)
pbar.empty()
with st.spinner("Computing scores…"):
logits = logits.permute(1, 0, 2)
logits = F.pad(logits, (0, 0, 0, window_len, 0, 0), value=torch.nan)
logits = logits.view(-1, logits.shape[-1])[:-window_len]
logits = logits.view(window_len, len(input_ids) + window_len - 2, logits.shape[-1])
scores = logits.to(torch.float32).log_softmax(dim=-1)
scores = scores[:, torch.arange(len(input_ids[1:])), input_ids[1:]]
scores = scores.diff(dim=0).transpose(0, 1)
scores = scores.nan_to_num()
scores /= scores.abs().max(dim=1, keepdim=True).values + 1e-9
scores = scores.to(torch.float16)
return scores
scores = run_context_length_probing(
_model=model,
_tokenizer=tokenizer,
_inputs=inputs,
window_len=window_len,
cache_key=(model_name, text),
)
tokens = ids_to_readable_tokens(tokenizer, input_ids)
highlighted_text_component(tokens=tokens, scores=scores.tolist())