Spaces:
Running
Running
Upload 2 files
Browse files- .gitattributes +1 -0
- app.py +107 -22
- wiki_pages-2023-08-08.csv +3 -0
.gitattributes
CHANGED
|
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
artist_data.hdf5 filter=lfs diff=lfs merge=lfs -text
|
| 37 |
complete_artist_data.hdf5 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
artist_data.hdf5 filter=lfs diff=lfs merge=lfs -text
|
| 37 |
complete_artist_data.hdf5 filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
wiki_pages-2023-08-08.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -22,10 +22,14 @@ faq_content="""
|
|
| 22 |
|
| 23 |
## What is the purpose of this tool?
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
## Does input order matter?
|
| 31 |
|
|
@@ -33,7 +37,7 @@ No
|
|
| 33 |
|
| 34 |
## Should I use underscores or spaces in the input tags?
|
| 35 |
|
| 36 |
-
|
| 37 |
|
| 38 |
## Can I use parentheses or weights as in the Stable Diffusion Automatic1111 WebUI?
|
| 39 |
|
|
@@ -46,6 +50,10 @@ An example that illustrates acceptable parentheses and weight formatting is:
|
|
| 46 |
Some data is excluded from consideration if it did not occur frequently enough in the sample from which the application makes its calculations.
|
| 47 |
If an artist or tag is too infrequent, we might not think we have enough data to make predictions about it.
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
## Are there any special tags?
|
| 50 |
|
| 51 |
Yes. We normalized the favorite counts of each image to a range of 0-9, with 0 being the lowest favcount, and 9 being the highest.
|
|
@@ -188,6 +196,63 @@ def build_aliases_dict(filename, reverse=False):
|
|
| 188 |
else:
|
| 189 |
aliases_dict[tag] = alias_list
|
| 190 |
return aliases_dict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
|
| 193 |
#Imagine we are adding smoothing_value to the number of times word_j occurs in each document for smoothing.
|
|
@@ -234,9 +299,32 @@ def geometric_mean_given_words(target_word, context_words, co_occurrence_matrix,
|
|
| 234 |
|
| 235 |
return geometric_mean
|
| 236 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
|
| 238 |
-
def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
|
| 239 |
|
|
|
|
| 240 |
#Initialize stuff
|
| 241 |
if not hasattr(find_similar_tags, "fasttext_small_model"):
|
| 242 |
find_similar_tags.fasttext_small_model = compress_fasttext.models.CompressedFastTextKeyedVectors.load('e621FastTextModel010Replacement_small.bin')
|
|
@@ -245,11 +333,15 @@ def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
|
|
| 245 |
find_similar_tags.tag2aliases = build_aliases_dict(tag_aliases_file)
|
| 246 |
if not hasattr(find_similar_tags, "alias2tags"):
|
| 247 |
find_similar_tags.alias2tags = build_aliases_dict(tag_aliases_file, reverse=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
|
| 249 |
transformed_tags = [tag.replace(' ', '_') for tag in test_tags]
|
| 250 |
|
| 251 |
-
# Find similar tags and prepare data for
|
| 252 |
-
|
| 253 |
for tag in test_tags:
|
| 254 |
if tag in special_tags:
|
| 255 |
continue
|
|
@@ -287,22 +379,15 @@ def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
|
|
| 287 |
geometric_mean = geometric_mean_given_words(word.replace(' ','_'), [context_tag for context_tag in transformed_tags if context_tag != word and context_tag != tag], conditional_co_occurrence_matrix, conditional_vocabulary, conditional_doc_count, smoothing_value=conditional_smoothing)
|
| 288 |
adjusted_score = (similarity_weight * geometric_mean) + ((1-similarity_weight)*score) # Apply the adjustment function
|
| 289 |
result[i] = (word, adjusted_score) # Update the tuple with the adjusted score
|
|
|
|
| 290 |
|
| 291 |
-
# Append tag and formatted similar tags to results_data
|
| 292 |
result = sorted(result, key=lambda x: x[1], reverse=True)[:10]
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
first_entry_for_tag = False
|
| 298 |
-
else:
|
| 299 |
-
results_data.append(["", word, sim])
|
| 300 |
-
results_data.append(["", "", ""]) # Adds a blank line after each group of tags
|
| 301 |
-
|
| 302 |
-
if not results_data:
|
| 303 |
-
results_data.append(["No Unknown Tags Found", "", ""])
|
| 304 |
|
| 305 |
-
return
|
| 306 |
|
| 307 |
def find_similar_artists(new_tags_string, top_n, similarity_weight, allow_nsfw_tags):
|
| 308 |
try:
|
|
@@ -341,7 +426,7 @@ iface = gr.Interface(
|
|
| 341 |
gr.Checkbox(label="Allow NSFW Tags", value=False)
|
| 342 |
],
|
| 343 |
outputs=[
|
| 344 |
-
gr.
|
| 345 |
gr.Textbox(label="Top Artists", info="These are the artists most strongly associated with your tags. The number in parenthes is a similarity score between 0 and 1, with higher numbers indicating greater similarity."),
|
| 346 |
gr.Textbox(label="Dynamic Prompts Format", info="For if you're using the Automatic1111 webui (https://github.com/AUTOMATIC1111/stable-diffusion-webui) with the Dynamic Prompts extension activated (https://github.com/adieyal/sd-dynamic-prompts) and want to try them all individually.")
|
| 347 |
],
|
|
|
|
| 22 |
|
| 23 |
## What is the purpose of this tool?
|
| 24 |
|
| 25 |
+
Since Stable Diffusion's initial release in 2022, users have developed a myriad of fine-tuned text to image models, each with unique "linguistic" preferences depending on the data from which it was fine-tuned.
|
| 26 |
+
Some models react best when prompted with verbose scene descriptions akin to DALL-E, while others fine-tuned on images scraped from popular image boards understand those boards' tag sets.
|
| 27 |
+
This tool serves as a linguistic bridge to the e621 image board tag lexicon, on which many popular models such as Fluffyrock, Fluffusion, and Pony Diffusion v6 were trained.
|
| 28 |
+
|
| 29 |
+
When you enter a txt2img prompt and press the "submit" button, the Tagset Completer parses your prompt and checks that all your tags are valid e621 tags.
|
| 30 |
+
If it finds any that are not, it recommends some valid e621 tags you can use to replace them in the "Unseen Tags" table.
|
| 31 |
+
Additionally, in the "Top Artists" text box, it lists the artists who would most likely draw an image having the set of tags you provided.
|
| 32 |
+
This is useful to align your prompt with the expected input to an e621-trained model.
|
| 33 |
|
| 34 |
## Does input order matter?
|
| 35 |
|
|
|
|
| 37 |
|
| 38 |
## Should I use underscores or spaces in the input tags?
|
| 39 |
|
| 40 |
+
As a rule, e621-trained models replace underscores in tags with spaces, so spaces are preferred.
|
| 41 |
|
| 42 |
## Can I use parentheses or weights as in the Stable Diffusion Automatic1111 WebUI?
|
| 43 |
|
|
|
|
| 50 |
Some data is excluded from consideration if it did not occur frequently enough in the sample from which the application makes its calculations.
|
| 51 |
If an artist or tag is too infrequent, we might not think we have enough data to make predictions about it.
|
| 52 |
|
| 53 |
+
## Why do some suggested tags not have summaries or wiki links?
|
| 54 |
+
|
| 55 |
+
Both of these features are extracted from the tag wiki pages, but some valid e621 tags do not have wiki pages.
|
| 56 |
+
|
| 57 |
## Are there any special tags?
|
| 58 |
|
| 59 |
Yes. We normalized the favorite counts of each image to a range of 0-9, with 0 being the lowest favcount, and 9 being the highest.
|
|
|
|
| 196 |
else:
|
| 197 |
aliases_dict[tag] = alias_list
|
| 198 |
return aliases_dict
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def build_tag_count_dict(filename):
|
| 202 |
+
with open(filename, 'r', newline='', encoding='utf-8') as csvfile:
|
| 203 |
+
reader = csv.reader(csvfile)
|
| 204 |
+
result_dict = {}
|
| 205 |
+
for row in reader:
|
| 206 |
+
key = row[0]
|
| 207 |
+
value = int(row[2]) if row[2].isdigit() else None
|
| 208 |
+
if value is not None:
|
| 209 |
+
result_dict[key] = value
|
| 210 |
+
return result_dict
|
| 211 |
+
|
| 212 |
+
import csv
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
def build_tag_id_wiki_dict(filename='wiki_pages-2023-08-08.csv'):
|
| 216 |
+
"""
|
| 217 |
+
Reads a CSV file and returns a dictionary mapping tag names to tuples of
|
| 218 |
+
(number, most relevant line from the wiki entry). Rows with a non-integer in the first column are ignored.
|
| 219 |
+
The most relevant line is the first line that does not start with "thumb" and is not blank.
|
| 220 |
+
|
| 221 |
+
Parameters:
|
| 222 |
+
- filename: The path to the CSV file.
|
| 223 |
+
|
| 224 |
+
Returns:
|
| 225 |
+
- A dictionary where each key is a tag name and each value is a tuple (number, most relevant wiki entry line).
|
| 226 |
+
"""
|
| 227 |
+
tag_data = {}
|
| 228 |
+
with open(filename, 'r', encoding='utf-8') as csvfile:
|
| 229 |
+
reader = csv.reader(csvfile)
|
| 230 |
+
|
| 231 |
+
# Skip the header row
|
| 232 |
+
next(reader)
|
| 233 |
+
|
| 234 |
+
for row in reader:
|
| 235 |
+
try:
|
| 236 |
+
# Attempt to convert the first column to an integer
|
| 237 |
+
number = int(row[0])
|
| 238 |
+
except ValueError:
|
| 239 |
+
# If conversion fails, skip this row
|
| 240 |
+
continue
|
| 241 |
+
|
| 242 |
+
tag = row[3]
|
| 243 |
+
wiki_entry_full = row[4]
|
| 244 |
+
|
| 245 |
+
# Process the wiki_entry to find the most relevant line
|
| 246 |
+
relevant_line = ''
|
| 247 |
+
for line in wiki_entry_full.split('\n'):
|
| 248 |
+
if line.strip() and not line.startswith("thumb"):
|
| 249 |
+
relevant_line = line
|
| 250 |
+
break
|
| 251 |
+
|
| 252 |
+
# Map the tag to a tuple of (number, relevant_line)
|
| 253 |
+
tag_data[tag] = (number, relevant_line)
|
| 254 |
+
|
| 255 |
+
return tag_data
|
| 256 |
|
| 257 |
|
| 258 |
#Imagine we are adding smoothing_value to the number of times word_j occurs in each document for smoothing.
|
|
|
|
| 299 |
|
| 300 |
return geometric_mean
|
| 301 |
|
| 302 |
+
|
| 303 |
+
def create_html_tables_for_tags(tag, result, tag2count, tag2idwiki):
|
| 304 |
+
# Wrap the tag part in a <span> with styles for bold and larger font
|
| 305 |
+
html_str = f"<div style='display: inline-block; margin: 20px; vertical-align: top;'><table><thead><tr><th colspan='3' style='text-align: center; padding-bottom: 10px;'>Unknown Tag: <span style='font-weight: bold; font-size: 20px;'>{tag}</span></th></tr></thead><tbody><tr style='border-bottom: 1px solid #000;'><th>Corrected Tag</th><th>Similarity</th><th>Count</th></tr>"
|
| 306 |
+
# Loop through the results and add table rows for each
|
| 307 |
+
for word, sim in result:
|
| 308 |
+
word_with_underscores = word.replace(' ', '_')
|
| 309 |
+
count = tag2count.get(word_with_underscores, 0) # Get the count if available, otherwise default to 0
|
| 310 |
+
tag_id, wiki_entry = tag2idwiki.get(word_with_underscores, (None, ''))
|
| 311 |
+
# Check if tag_id and wiki_entry are valid
|
| 312 |
+
if tag_id is not None and wiki_entry:
|
| 313 |
+
# Construct the URL for the tag's wiki page
|
| 314 |
+
wiki_url = f"https://e621.net/wiki_pages/{tag_id}"
|
| 315 |
+
# Make the tag a hyperlink with a tooltip
|
| 316 |
+
tag_element = f"<a href='{wiki_url}' target='_blank' title='{wiki_entry}'>{word}</a>"
|
| 317 |
+
else:
|
| 318 |
+
# Display the word without any hyperlink or tooltip
|
| 319 |
+
tag_element = word
|
| 320 |
+
# Include the tag element in the table row
|
| 321 |
+
html_str += f"<tr><td style='border: none; padding: 5px; height: 20px;'>{tag_element}</td><td style='border: none; padding: 5px; height: 20px;'>{round(sim, 3)}</td><td style='border: none; padding: 5px; height: 20px;'>{count}</td></tr>"
|
| 322 |
+
|
| 323 |
+
html_str += "</tbody></table></div>"
|
| 324 |
+
return html_str
|
| 325 |
|
|
|
|
| 326 |
|
| 327 |
+
def find_similar_tags(test_tags, similarity_weight, allow_nsfw_tags):
|
| 328 |
#Initialize stuff
|
| 329 |
if not hasattr(find_similar_tags, "fasttext_small_model"):
|
| 330 |
find_similar_tags.fasttext_small_model = compress_fasttext.models.CompressedFastTextKeyedVectors.load('e621FastTextModel010Replacement_small.bin')
|
|
|
|
| 333 |
find_similar_tags.tag2aliases = build_aliases_dict(tag_aliases_file)
|
| 334 |
if not hasattr(find_similar_tags, "alias2tags"):
|
| 335 |
find_similar_tags.alias2tags = build_aliases_dict(tag_aliases_file, reverse=True)
|
| 336 |
+
if not hasattr(find_similar_tags, "tag2count"):
|
| 337 |
+
find_similar_tags.tag2count = build_tag_count_dict(tag_aliases_file)
|
| 338 |
+
if not hasattr(find_similar_tags, "tag2idwiki"):
|
| 339 |
+
find_similar_tags.tag2idwiki = build_tag_id_wiki_dict()
|
| 340 |
|
| 341 |
transformed_tags = [tag.replace(' ', '_') for tag in test_tags]
|
| 342 |
|
| 343 |
+
# Find similar tags and prepare data for tables
|
| 344 |
+
html_content = ""
|
| 345 |
for tag in test_tags:
|
| 346 |
if tag in special_tags:
|
| 347 |
continue
|
|
|
|
| 379 |
geometric_mean = geometric_mean_given_words(word.replace(' ','_'), [context_tag for context_tag in transformed_tags if context_tag != word and context_tag != tag], conditional_co_occurrence_matrix, conditional_vocabulary, conditional_doc_count, smoothing_value=conditional_smoothing)
|
| 380 |
adjusted_score = (similarity_weight * geometric_mean) + ((1-similarity_weight)*score) # Apply the adjustment function
|
| 381 |
result[i] = (word, adjusted_score) # Update the tuple with the adjusted score
|
| 382 |
+
#print(word, score, geometric_mean, adjusted_score)
|
| 383 |
|
|
|
|
| 384 |
result = sorted(result, key=lambda x: x[1], reverse=True)[:10]
|
| 385 |
+
html_content += create_html_tables_for_tags(tag, result, find_similar_tags.tag2count, find_similar_tags.tag2idwiki)
|
| 386 |
+
# If no tags were processed, add a message
|
| 387 |
+
if not html_content:
|
| 388 |
+
html_content = "<p>No Unknown Tags Found</p>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
|
| 390 |
+
return html_content # Return list of lists for Dataframe
|
| 391 |
|
| 392 |
def find_similar_artists(new_tags_string, top_n, similarity_weight, allow_nsfw_tags):
|
| 393 |
try:
|
|
|
|
| 426 |
gr.Checkbox(label="Allow NSFW Tags", value=False)
|
| 427 |
],
|
| 428 |
outputs=[
|
| 429 |
+
gr.HTML(label="Unseen Tags"),
|
| 430 |
gr.Textbox(label="Top Artists", info="These are the artists most strongly associated with your tags. The number in parenthes is a similarity score between 0 and 1, with higher numbers indicating greater similarity."),
|
| 431 |
gr.Textbox(label="Dynamic Prompts Format", info="For if you're using the Automatic1111 webui (https://github.com/AUTOMATIC1111/stable-diffusion-webui) with the Dynamic Prompts extension activated (https://github.com/adieyal/sd-dynamic-prompts) and want to try them all individually.")
|
| 432 |
],
|
wiki_pages-2023-08-08.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d453c0cc8ae09c548e554ceb77b1c1578c277eb2c5a6278a85f89c73566a7b27
|
| 3 |
+
size 30986436
|