Spaces:
Runtime error
Runtime error
Fix length issue
Browse files
app.py
CHANGED
@@ -49,6 +49,7 @@ def really_clean_tokens(tokens):
|
|
49 |
def generate_and_visualize(prompt, num_tokens=10):
|
50 |
input_ids = tokenizer(prompt, return_tensors="pt", add_special_tokens=True).input_ids.to(model.device)
|
51 |
input_embeds = model.get_input_embeddings()(input_ids)
|
|
|
52 |
|
53 |
generated_tokens_ids = []
|
54 |
all_relevances = []
|
@@ -70,7 +71,6 @@ def generate_and_visualize(prompt, num_tokens=10):
|
|
70 |
if next_token.item() == tokenizer.eos_token_id:
|
71 |
print("EOS token generated, stopping generation.")
|
72 |
break
|
73 |
-
input_tokens = really_clean_tokens(tokenizer.convert_ids_to_tokens(input_ids[0]))
|
74 |
generated_tokens = really_clean_tokens(tokenizer.convert_ids_to_tokens(generated_tokens_ids))
|
75 |
|
76 |
return input_tokens, all_relevances, generated_tokens
|
@@ -80,7 +80,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
80 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
81 |
|
82 |
### FIND ZONES OF INTEREST
|
83 |
-
threshold_per_token = 0.
|
84 |
kernel_width = 6
|
85 |
context_width = 20 # Number of tokens to include as context on each side
|
86 |
kernel = np.ones((kernel_width, kernel_width))
|
@@ -93,7 +93,8 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
93 |
|
94 |
# Find where the rolled sum is greater than the threshold
|
95 |
significant_areas = rolled_sum > threshold_per_token
|
96 |
-
print(f"Found {significant_areas.sum()} relevant tokens
|
|
|
97 |
|
98 |
def find_largest_contiguous_patch(array):
|
99 |
current_patch_start = None
|
@@ -200,14 +201,15 @@ css = """
|
|
200 |
max-width: 600px;
|
201 |
width:500px;
|
202 |
word-wrap: break-word;
|
203 |
-
z-index:
|
204 |
}
|
205 |
.hoverable:hover .hover-note { display: block; }
|
206 |
"""
|
207 |
examples = [
|
208 |
-
"""Context:
|
|
|
209 |
|
210 |
-
Question: How high did they climb in 1922?
|
211 |
"""Hurricane Katrina killed hundreds of people as it made landfall on New Orleans in 2005 - many of these deaths could have been avoided if alerts had been given one day earlier. Accurate weather forecasts are really life-saving.
|
212 |
|
213 |
π₯ Now, NASA and IBM just dropped a game-changing new model: the first ever foundation model for weather! This means, it's the first time we have a generalist model not restricted to one task, but able to predict 160 weather variables!
|
@@ -230,7 +232,7 @@ How can you build tools simply in transformers?
|
|
230 |
Answer:""",
|
231 |
]
|
232 |
|
233 |
-
with gr.Blocks(css=css) as demo:
|
234 |
gr.Markdown("# Syntax highlighted text generation - for RAG applications")
|
235 |
|
236 |
input_text = gr.Textbox(label="Enter your prompt:", lines=10, value=examples[0])
|
|
|
49 |
def generate_and_visualize(prompt, num_tokens=10):
|
50 |
input_ids = tokenizer(prompt, return_tensors="pt", add_special_tokens=True).input_ids.to(model.device)
|
51 |
input_embeds = model.get_input_embeddings()(input_ids)
|
52 |
+
input_tokens = really_clean_tokens(tokenizer.convert_ids_to_tokens(input_ids[0]))
|
53 |
|
54 |
generated_tokens_ids = []
|
55 |
all_relevances = []
|
|
|
71 |
if next_token.item() == tokenizer.eos_token_id:
|
72 |
print("EOS token generated, stopping generation.")
|
73 |
break
|
|
|
74 |
generated_tokens = really_clean_tokens(tokenizer.convert_ids_to_tokens(generated_tokens_ids))
|
75 |
|
76 |
return input_tokens, all_relevances, generated_tokens
|
|
|
80 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
81 |
|
82 |
### FIND ZONES OF INTEREST
|
83 |
+
threshold_per_token = 0.2
|
84 |
kernel_width = 6
|
85 |
context_width = 20 # Number of tokens to include as context on each side
|
86 |
kernel = np.ones((kernel_width, kernel_width))
|
|
|
93 |
|
94 |
# Find where the rolled sum is greater than the threshold
|
95 |
significant_areas = rolled_sum > threshold_per_token
|
96 |
+
print(f"Found {significant_areas.sum()} relevant tokens: lower threshold to find more. Max was {rolled_sum.max()}")
|
97 |
+
print("LENGTHS:", len(input_tokens), significant_areas.shape, len(generated_tokens))
|
98 |
|
99 |
def find_largest_contiguous_patch(array):
|
100 |
current_patch_start = None
|
|
|
201 |
max-width: 600px;
|
202 |
width:500px;
|
203 |
word-wrap: break-word;
|
204 |
+
z-index: 100;
|
205 |
}
|
206 |
.hoverable:hover .hover-note { display: block; }
|
207 |
"""
|
208 |
examples = [
|
209 |
+
"""Context:
|
210 |
+
The first recorded efforts to reach Everest's summit were made by British mountaineers. As Nepal did not allow foreigners to enter the country at the time, the British made several attempts on the north ridge route from the Tibetan side. After the first reconnaissance expedition by the British in 1921 reached 7,000 m (22,970 ft) on the North Col, the 1922 expedition pushed the north ridge route up to 8,320 m (27,300 ft), marking the first time a human had climbed above 8,000 m (26,247 ft). The 1924 expedition resulted in one of the greatest mysteries on Everest to this day: George Mallory and Andrew Irvine made a final summit attempt on 8 June but never returned, sparking debate as to whether they were the first to reach the top. Tenzing Norgay and Edmund Hillary made the first documented ascent of Everest in 1953, using the southeast ridge route. Norgay had reached 8,595 m (28,199 ft) the previous year as a member of the 1952 Swiss expedition. The Chinese mountaineering team of Wang Fuzhou, Gonpo, and Qu Yinhua made the first reported ascent of the peak from the north ridge on 25 May 1960.
|
211 |
|
212 |
+
Question: How high did they climb in 1922? Answer:""",
|
213 |
"""Hurricane Katrina killed hundreds of people as it made landfall on New Orleans in 2005 - many of these deaths could have been avoided if alerts had been given one day earlier. Accurate weather forecasts are really life-saving.
|
214 |
|
215 |
π₯ Now, NASA and IBM just dropped a game-changing new model: the first ever foundation model for weather! This means, it's the first time we have a generalist model not restricted to one task, but able to predict 160 weather variables!
|
|
|
232 |
Answer:""",
|
233 |
]
|
234 |
|
235 |
+
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
236 |
gr.Markdown("# Syntax highlighted text generation - for RAG applications")
|
237 |
|
238 |
input_text = gr.Textbox(label="Enter your prompt:", lines=10, value=examples[0])
|