Spaces:

yeq6x
/

Image2Body_gradio

Runtime error

App Files Files Community

yeq6x commited on Jul 7, 2024

Commit

ce12cd7

1 Parent(s): 2c91007

refactor

Browse files

Files changed (2) hide show

scripts/generate_prompt.py +48 -119
scripts/process_utils.py +3 -5

scripts/generate_prompt.py CHANGED Viewed

@@ -10,145 +10,74 @@ from tensorflow.keras.layers import TFSMLayer
 from huggingface_hub import hf_hub_download
 from pathlib import Path
-# from wd14 tagger
 IMAGE_SIZE = 448
-# wd-v1-4-swinv2-tagger-v2 / wd-v1-4-vit-tagger / wd-v1-4-vit-tagger-v2/ wd-v1-4-convnext-tagger / wd-v1-4-convnext-tagger-v2
-DEFAULT_WD14_TAGGER_REPO = "SmilingWolf/wd-v1-4-convnext-tagger-v2"
-FILES = ["keras_metadata.pb", "saved_model.pb", "selected_tags.csv"]
-SUB_DIR = "variables"
-SUB_DIR_FILES = ["variables.data-00000-of-00001", "variables.index"]
-CSV_FILE = FILES[-1]
 def preprocess_image(image):
-    image = np.array(image)
-    image = image[:, :, ::-1]  # RGB->BGR
-    # pad to square
-    size = max(image.shape[0:2])
-    pad_x = size - image.shape[1]
-    pad_y = size - image.shape[0]
-    pad_l = pad_x // 2
-    pad_t = pad_y // 2
-    image = np.pad(image, ((pad_t, pad_y - pad_t), (pad_l, pad_x - pad_l), (0, 0)), mode="constant", constant_values=255)
     interp = cv2.INTER_AREA if size > IMAGE_SIZE else cv2.INTER_LANCZOS4
-    image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE), interpolation=interp)
-    image = image.astype(np.float32)
-    return image
 def load_wd14_tagger_model():
     model_dir = "wd14_tagger_model"
-    repo_id = DEFAULT_WD14_TAGGER_REPO
     if not os.path.exists(model_dir):
-        print(f"downloading wd14 tagger model from hf_hub. id: {repo_id}")
-        for file in FILES:
-            hf_hub_download(repo_id, file, cache_dir=model_dir, force_download=True, force_filename=file)
-        for file in SUB_DIR_FILES:
-            hf_hub_download(
-                repo_id,
-                file,
-                subfolder=SUB_DIR,
-                cache_dir=model_dir + "/" + SUB_DIR,
-                force_download=True,
-                force_filename=file,
-            )
     else:
-        print("using existing wd14 tagger model")
-    # モデルを読み込む
-    model = TFSMLayer(model_dir, call_endpoint='serving_default')
-    return model
-def generate_tags(images, model_dir, model):
-    with open(os.path.join(model_dir, CSV_FILE), "r", encoding="utf-8") as f:
         reader = csv.reader(f)
-        l = [row for row in reader]
-        header = l[0]  # tag_id,name,category,count
-        rows = l[1:]
-    assert header[0] == "tag_id" and header[1] == "name" and header[2] == "category", f"unexpected csv format: {header}"
-    general_tags = [row[1] for row in rows[1:] if row[2] == "0"]
-    character_tags = [row[1] for row in rows[1:] if row[2] == "4"]
     tag_freq = {}
-    undesired_tags = ['one-piece_swimsuit',
-                      'swimsuit',
-                      'leotard',
-                      'saitama_(one-punch_man)',
-                      '1boy',
-    ]
-    probs = model(images, training=False)
-    probs = probs['predictions_sigmoid'].numpy()
     tag_text_list = []
     for prob in probs:
-        combined_tags = []
-        general_tag_text = ""
-        character_tag_text = ""
-        thresh = 0.35
         for i, p in enumerate(prob[4:]):
-            if i < len(general_tags) and p >= thresh:
-                tag_name = general_tags[i]
-                if tag_name not in undesired_tags:
-                    tag_freq[tag_name] = tag_freq.get(tag_name, 0) + 1
-                    general_tag_text += ", " + tag_name
-                    combined_tags.append(tag_name)
-            elif i >= len(general_tags) and p >= thresh:
-                tag_name = character_tags[i - len(general_tags)]
-                if tag_name not in undesired_tags:
-                    tag_freq[tag_name] = tag_freq.get(tag_name, 0) + 1
-                    character_tag_text += ", " + tag_name
-                    combined_tags.append(tag_name)
-        if len(general_tag_text) > 0:
-            general_tag_text = general_tag_text[2:]
-        if len(character_tag_text) > 0:
-            character_tag_text = character_tag_text[2:]
-        tag_text = ", ".join(combined_tags)
-        tag_text_list.append(tag_text)
     return tag_text_list
-def generate_prompt_json(target_folder, prompt_file, model_dir, model):
-    image_files = [f for f in os.listdir(target_folder) if os.path.isfile(os.path.join(target_folder, f))]
-    image_count = len(image_files)
-    prompt_list = []
-    for i, filename in enumerate(image_files, 1):
-        source_path = "source/" + filename
-        target_path = os.path.join(target_folder, filename)  # Use absolute path
-        target_path2 = "target/" + filename
-        prompt = generate_tags(target_path, model_dir, model)
-        for j in range(4):
-            prompt_data = {
-                "source": f"{source_path.split('.')[0]}_{j}.jpg",
-                "target": f"{target_path2.split('.')[0]}_{j}.jpg",
-                "prompt": prompt
-            }
-            prompt_list.append(prompt_data)
-        print(f"Processed Images: {i}/{image_count}", end="\r", flush=True)
-    with open(prompt_file, "w") as file:
-        for prompt_data in prompt_list:
-            json.dump(prompt_data, file)
-            file.write("\n")
-    print(f"Processing completed. Total Images: {image_count}")
-if __name__ == '__main__':
-    model_dir = "wd14_tagger_model"
-    model = load_wd14_tagger_model()
-    prompt = generate_tags(target_path, model_dir, model)

 from huggingface_hub import hf_hub_download
 from pathlib import Path
+# 画像サイズの設定
 IMAGE_SIZE = 448
+# デフォルトのタグ付けリポジトリとファイル構成
+DEFAULT_REPO = "SmilingWolf/wd-v1-4-convnext-tagger-v2"
+MODEL_FILES = ["keras_metadata.pb", "saved_model.pb", "selected_tags.csv"]
+VAR_DIR = "variables"
+VAR_FILES = ["variables.data-00000-of-00001", "variables.index"]
+CSV_FILE = MODEL_FILES[-1]
 def preprocess_image(image):
+    """画像を前処理して正方形に変換"""
+    img = np.array(image)[:, :, ::-1]  # RGB->BGR
+    size = max(img.shape[:2])
+    pad_x, pad_y = size - img.shape[1], size - img.shape[0]
+    img = np.pad(img, ((pad_y // 2, pad_y - pad_y // 2), (pad_x // 2, pad_x - pad_x // 2), (0, 0)), mode="constant", constant_values=255)
     interp = cv2.INTER_AREA if size > IMAGE_SIZE else cv2.INTER_LANCZOS4
+    img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE), interpolation=interp)
+    return img.astype(np.float32)
+def download_model_files(repo_id, model_dir, sub_dir, files, sub_files):
+    """モデルファイルをHugging Face Hubからダウンロード"""
+    for file in files:
+        hf_hub_download(repo_id, file, cache_dir=model_dir, force_download=True, force_filename=file)
+    for file in sub_files:
+        hf_hub_download(repo_id, file, subfolder=sub_dir, cache_dir=os.path.join(model_dir, sub_dir), force_download=True, force_filename=file)
 def load_wd14_tagger_model():
+    """WD14タグ付けモデルをロード"""
     model_dir = "wd14_tagger_model"
     if not os.path.exists(model_dir):
+        download_model_files(DEFAULT_REPO, model_dir, VAR_DIR, MODEL_FILES, VAR_FILES)
     else:
+        print("Using existing model")
+    return TFSMLayer(model_dir, call_endpoint='serving_default')
+def read_tags_from_csv(csv_path):
+    """CSVファイルからタグを読み取る"""
+    with open(csv_path, "r", encoding="utf-8") as f:
         reader = csv.reader(f)
+        tags = [row for row in reader]
+    header = tags[0]
+    rows = tags[1:]
+    assert header[:3] == ["tag_id", "name", "category"], f"Unexpected CSV format: {header}"
+    return rows
+def generate_tags(images, model_dir, model):
+    """画像にタグを生成"""
+    rows = read_tags_from_csv(os.path.join(model_dir, CSV_FILE))
+    general_tags = [row[1] for row in rows if row[2] == "0"]
+    character_tags = [row[1] for row in rows if row[2] == "4"]
     tag_freq = {}
+    undesired_tags = {'one-piece_swimsuit', 'swimsuit', 'leotard', 'saitama_(one-punch_man)', '1boy'}
+    probs = model(images, training=False)['predictions_sigmoid'].numpy()
     tag_text_list = []
     for prob in probs:
+        tags_combined = []
         for i, p in enumerate(prob[4:]):
+            tag_list = general_tags if i < len(general_tags) else character_tags
+            tag = tag_list[i - len(general_tags)] if i >= len(general_tags) else tag_list[i]
+            if p >= 0.35 and tag not in undesired_tags:
+                tag_freq[tag] = tag_freq.get(tag, 0) + 1
+                tags_combined.append(tag)
+        tag_text_list.append(", ".join(tags_combined))
     return tag_text_list

scripts/process_utils.py CHANGED Viewed

@@ -40,9 +40,9 @@ def initialize(_use_local=False, use_gpu=False, use_dotenv=False):
     device = "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
     torch_dtype = torch.float16 if device == "cuda" else torch.float32
     use_local = _use_local
-    print('')
-    print(f"Device: {device}, Local model: {_use_local}")
-    print('')
     init_model(use_local)
     model = load_wd14_tagger_model()
     sotai_gen_pipe = initialize_sotai_model()
@@ -59,7 +59,6 @@ def initialize_sotai_model():
     controlnet_path1 =  get_file_path(os.environ["controlnet_name1"], subfolder=os.environ["controlnet_dir2"])
     # controlnet_path1 =  get_file_path(os.environ["controlnet_name2"], subfolder=os.environ["controlnet_dir1"])
     controlnet_path2 =  get_file_path(os.environ["controlnet_name2"], subfolder=os.environ["controlnet_dir1"])
-    print(use_local, controlnet_path1)
     # Load the Stable Diffusion model
     sd_pipe = StableDiffusionPipeline.from_single_file(
@@ -294,7 +293,6 @@ def process_image(input_image, mode: str, weight1: float = 0.4, weight2: float =
         image_np = np.array(ensure_rgb(input_image))
         prompt = get_wd_tags([image_np])[0]
         prompt = f"{prompt}"
-        print(prompt)
         refined_image = generate_refined_image(prompt, input_image, output_width, output_height, weight1, weight2)
         refined_image = refined_image.convert('RGB')

     device = "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
     torch_dtype = torch.float16 if device == "cuda" else torch.float32
     use_local = _use_local
+    print(f"\nDevice: {device}, Local model: {_use_local}\n")
     init_model(use_local)
     model = load_wd14_tagger_model()
     sotai_gen_pipe = initialize_sotai_model()
     controlnet_path1 =  get_file_path(os.environ["controlnet_name1"], subfolder=os.environ["controlnet_dir2"])
     # controlnet_path1 =  get_file_path(os.environ["controlnet_name2"], subfolder=os.environ["controlnet_dir1"])
     controlnet_path2 =  get_file_path(os.environ["controlnet_name2"], subfolder=os.environ["controlnet_dir1"])
     # Load the Stable Diffusion model
     sd_pipe = StableDiffusionPipeline.from_single_file(
         image_np = np.array(ensure_rgb(input_image))
         prompt = get_wd_tags([image_np])[0]
         prompt = f"{prompt}"
         refined_image = generate_refined_image(prompt, input_image, output_width, output_height, weight1, weight2)
         refined_image = refined_image.convert('RGB')